1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335 pub kind: TokenKind,
337 pub span: Span,
339 pub(crate) flags: TokenFlags,
340 payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345 match kind {
346 TokenKind::Word => LexedWordSegmentKind::Plain,
347 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349 _ => LexedWordSegmentKind::Composite,
350 }
351 }
352
353 pub(crate) fn punctuation(kind: TokenKind) -> Self {
354 Self {
355 kind,
356 span: Span::new(),
357 flags: TokenFlags::empty(),
358 payload: TokenPayload::None,
359 }
360 }
361
362 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363 let flags = if word.has_cooked_text() {
364 TokenFlags::cooked_text()
365 } else {
366 TokenFlags::empty()
367 };
368
369 Self {
370 kind,
371 span: Span::new(),
372 flags,
373 payload: TokenPayload::Word(word),
374 }
375 }
376
377 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378 Self::with_word_payload(
379 kind,
380 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381 )
382 }
383
384 fn owned_word(kind: TokenKind, text: String) -> Self {
385 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386 }
387
388 fn comment() -> Self {
389 Self {
390 kind: TokenKind::Comment,
391 span: Span::new(),
392 flags: TokenFlags::empty(),
393 payload: TokenPayload::None,
394 }
395 }
396
397 fn fd(kind: TokenKind, fd: i32) -> Self {
398 Self {
399 kind,
400 span: Span::new(),
401 flags: TokenFlags::empty(),
402 payload: TokenPayload::Fd(fd),
403 }
404 }
405
406 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407 Self {
408 kind,
409 span: Span::new(),
410 flags: TokenFlags::empty(),
411 payload: TokenPayload::FdPair(src_fd, dst_fd),
412 }
413 }
414
415 fn error(kind: LexerErrorKind) -> Self {
416 Self {
417 kind: TokenKind::Error,
418 span: Span::new(),
419 flags: TokenFlags::empty(),
420 payload: TokenPayload::Error(kind),
421 }
422 }
423
424 pub(crate) fn with_span(mut self, span: Span) -> Self {
425 self.span = span;
426 self
427 }
428
429 pub(crate) fn rebased(mut self, base: Position) -> Self {
430 self.span = self.span.rebased(base);
431 self.payload = match self.payload {
432 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433 payload => payload,
434 };
435 self
436 }
437
438 pub(crate) fn with_synthetic_flag(mut self) -> Self {
439 self.flags = self.flags.with_synthetic();
440 self
441 }
442
443 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444 let payload = match self.payload {
445 TokenPayload::None => TokenPayload::None,
446 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449 TokenPayload::Error(kind) => TokenPayload::Error(kind),
450 };
451
452 LexedToken {
453 kind: self.kind,
454 span: self.span,
455 flags: self.flags,
456 payload,
457 }
458 }
459
460 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461 let payload = match self.payload {
462 TokenPayload::None => TokenPayload::None,
463 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466 TokenPayload::Error(kind) => TokenPayload::Error(kind),
467 };
468
469 LexedToken {
470 kind: self.kind,
471 span: self.span,
472 flags: self.flags,
473 payload,
474 }
475 }
476
477 pub fn word_text(&self) -> Option<&str> {
479 self.kind
480 .is_word_like()
481 .then_some(())
482 .and_then(|_| match &self.payload {
483 TokenPayload::Word(word) => word.text(),
484 _ => None,
485 })
486 }
487
488 pub fn word_string(&self) -> Option<String> {
490 self.kind
491 .is_word_like()
492 .then_some(())
493 .and_then(|_| match &self.payload {
494 TokenPayload::Word(word) => Some(word.joined_text()),
495 _ => None,
496 })
497 }
498
499 pub fn word(&self) -> Option<&LexedWord<'a>> {
501 match &self.payload {
502 TokenPayload::Word(word) => Some(word),
503 _ => None,
504 }
505 }
506
507 pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510 return None;
511 }
512
513 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514 .then(|| &source[self.span.start.offset..self.span.end.offset])
515 }
516
517 pub fn fd_value(&self) -> Option<i32> {
519 match self.payload {
520 TokenPayload::Fd(fd) => Some(fd),
521 _ => None,
522 }
523 }
524
525 pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527 match self.payload {
528 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529 _ => None,
530 }
531 }
532
533 pub fn error_kind(&self) -> Option<LexerErrorKind> {
535 match self.payload {
536 TokenPayload::Error(kind) => Some(kind),
537 _ => None,
538 }
539 }
540}
541
542#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545 pub content: String,
547 pub content_span: Span,
549}
550
551const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557 rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561 fn new(source: &'a str) -> Self {
562 Self { rest: source }
563 }
564
565 fn first(&self) -> Option<char> {
566 self.rest.chars().next()
567 }
568
569 fn second(&self) -> Option<char> {
570 let mut chars = self.rest.chars();
571 chars.next()?;
572 chars.next()
573 }
574
575 fn third(&self) -> Option<char> {
576 let mut chars = self.rest.chars();
577 chars.next()?;
578 chars.next()?;
579 chars.next()
580 }
581
582 fn bump(&mut self) -> Option<char> {
583 let ch = self.first()?;
584 self.rest = &self.rest[ch.len_utf8()..];
585 Some(ch)
586 }
587
588 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589 let start = self.rest;
590 let mut end = 0;
591
592 for ch in start.chars() {
593 if !predicate(ch) {
594 break;
595 }
596 end += ch.len_utf8();
597 }
598
599 self.rest = &start[end..];
600 &start[..end]
601 }
602
603 fn rest(&self) -> &'a str {
604 self.rest
605 }
606
607 fn skip_bytes(&mut self, count: usize) {
608 self.rest = &self.rest[count..];
609 }
610
611 fn find_byte(&self, byte: u8) -> Option<usize> {
612 memchr(byte, self.rest.as_bytes())
613 }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618 source: &'a str,
619 line_starts: Vec<usize>,
620 cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626 pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630 fn new(source: &'a str) -> Self {
631 let mut line_starts =
632 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633 line_starts.push(0);
634 line_starts.extend(
635 source
636 .bytes()
637 .enumerate()
638 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639 );
640
641 Self {
642 source,
643 line_starts,
644 cached: Position::new(),
645 }
646 }
647
648 fn position(&mut self, offset: usize) -> Position {
649 if offset == self.cached.offset {
650 return self.cached;
651 }
652
653 let position = if offset > self.cached.offset && offset <= self.source.len() {
654 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655 } else {
656 self.position_uncached(offset)
657 };
658 self.cached = position;
659 position
660 }
661
662 fn position_uncached(&self, offset: usize) -> Position {
663 let offset = offset.min(self.source.len());
664 let line_index = self
665 .line_starts
666 .partition_point(|start| *start <= offset)
667 .saturating_sub(1);
668 let line_start = self.line_starts[line_index];
669 let line_text = &self.source[line_start..offset];
670 let column = if line_text.is_ascii() {
671 line_text.len() + 1
672 } else {
673 line_text.chars().count() + 1
674 };
675
676 Position {
677 line: line_index + 1,
678 column,
679 offset,
680 }
681 }
682
683 fn advance_from(mut position: Position, text: &str) -> Position {
684 position.offset += text.len();
685 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686 if newline_count == 0 {
687 position.column += if text.is_ascii() {
688 text.len()
689 } else {
690 text.chars().count()
691 };
692 return position;
693 }
694
695 position.line += newline_count;
696 let tail_start = memrchr(b'\n', text.as_bytes())
697 .map(|index| index + 1)
698 .unwrap_or_default();
699 let tail = &text[tail_start..];
700 position.column = if tail.is_ascii() {
701 tail.len() + 1
702 } else {
703 tail.chars().count() + 1
704 };
705 position
706 }
707}
708
709#[derive(Clone)]
711pub struct Lexer<'a> {
712 #[allow(dead_code)] input: &'a str,
714 offset: usize,
716 cursor: Cursor<'a>,
717 position_map: PositionMap<'a>,
718 reinject_buf: VecDeque<char>,
721 reinject_resume_offset: Option<usize>,
723 max_subst_depth: usize,
725 initial_zsh_options: Option<ZshOptionState>,
726 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727 zsh_timeline_index: usize,
728 #[cfg(feature = "benchmarking")]
729 benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733 pub fn new(input: &'a str) -> Self {
735 Self::with_max_subst_depth_and_profile(
736 input,
737 DEFAULT_MAX_SUBST_DEPTH,
738 &ShellProfile::native(super::ShellDialect::Bash),
739 None,
740 )
741 }
742
743 pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746 Self::with_max_subst_depth_and_profile(
747 input,
748 max_depth,
749 &ShellProfile::native(super::ShellDialect::Bash),
750 None,
751 )
752 }
753
754 pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757 .then(|| ZshOptionTimeline::build(input, shell_profile))
758 .flatten()
759 .map(Arc::new);
760 Self::with_max_subst_depth_and_profile(
761 input,
762 DEFAULT_MAX_SUBST_DEPTH,
763 shell_profile,
764 zsh_timeline,
765 )
766 }
767
768 pub(crate) fn with_max_subst_depth_and_profile(
769 input: &'a str,
770 max_depth: usize,
771 shell_profile: &ShellProfile,
772 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773 ) -> Self {
774 Self {
775 input,
776 offset: 0,
777 cursor: Cursor::new(input),
778 position_map: PositionMap::new(input),
779 reinject_buf: VecDeque::new(),
780 reinject_resume_offset: None,
781 max_subst_depth: max_depth,
782 initial_zsh_options: shell_profile.zsh_options().cloned(),
783 zsh_timeline,
784 zsh_timeline_index: 0,
785 #[cfg(feature = "benchmarking")]
786 benchmark_counters: None,
787 }
788 }
789
790 pub fn position(&self) -> Position {
792 self.position_map.position_uncached(self.offset)
793 }
794
795 fn current_position(&mut self) -> Position {
796 #[cfg(feature = "benchmarking")]
797 self.maybe_record_current_position_call();
798 self.position_map.position(self.offset)
799 }
800
801 #[cfg(feature = "benchmarking")]
802 pub(crate) fn enable_benchmark_counters(&mut self) {
803 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
804 }
805
806 #[cfg(feature = "benchmarking")]
807 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
808 self.benchmark_counters.unwrap_or_default()
809 }
810
811 #[cfg(feature = "benchmarking")]
812 fn maybe_record_current_position_call(&mut self) {
813 if let Some(counters) = &mut self.benchmark_counters {
814 counters.current_position_calls += 1;
815 }
816 }
817
818 fn sync_offset_to_cursor(&mut self) {
819 if self.reinject_buf.is_empty()
820 && let Some(offset) = self.reinject_resume_offset.take()
821 {
822 self.offset = offset;
823 }
824 }
825
826 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
829 self.next_lexed_token().map(|token| token.kind)
830 }
831
832 fn peek_char(&mut self) -> Option<char> {
833 self.sync_offset_to_cursor();
834 if let Some(&ch) = self.reinject_buf.front() {
835 Some(ch)
836 } else {
837 self.cursor.first()
838 }
839 }
840
841 fn advance(&mut self) -> Option<char> {
842 self.sync_offset_to_cursor();
843 let ch = if !self.reinject_buf.is_empty() {
844 self.reinject_buf.pop_front()
845 } else {
846 self.cursor.bump()
847 };
848 if let Some(c) = ch {
849 self.offset += c.len_utf8();
850 }
851 ch
852 }
853
854 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
855 self.reinject_buf
856 .iter()
857 .copied()
858 .chain(self.cursor.rest().chars())
859 }
860
861 fn second_char(&self) -> Option<char> {
862 match self.reinject_buf.len() {
863 0 => self.cursor.second(),
864 1 => self.cursor.first(),
865 _ => self.reinject_buf.get(1).copied(),
866 }
867 }
868
869 fn third_char(&self) -> Option<char> {
870 match self.reinject_buf.len() {
871 0 => self.cursor.third(),
872 1 => self.cursor.second(),
873 2 => self.cursor.first(),
874 _ => self.reinject_buf.get(2).copied(),
875 }
876 }
877
878 fn fourth_char(&self) -> Option<char> {
879 match self.reinject_buf.len() {
880 0 => self.cursor.rest().chars().nth(3),
881 1 => self.cursor.third(),
882 2 => self.cursor.second(),
883 3 => self.cursor.first(),
884 _ => self.reinject_buf.get(3).copied(),
885 }
886 }
887
888 fn consume_source_bytes(&mut self, byte_len: usize) {
889 debug_assert!(self.reinject_buf.is_empty());
890 self.sync_offset_to_cursor();
891 self.offset += byte_len;
892 self.cursor.skip_bytes(byte_len);
893 }
894
895 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
896 debug_assert!(self.reinject_buf.is_empty());
897 self.offset += byte_len;
898 }
899
900 fn consume_ascii_chars(&mut self, count: usize) {
901 if self.reinject_buf.is_empty() {
902 self.consume_source_bytes(count);
903 return;
904 }
905
906 for _ in 0..count {
907 self.advance();
908 }
909 }
910
911 fn source_horizontal_whitespace_len(&self) -> usize {
912 self.cursor
913 .rest()
914 .as_bytes()
915 .iter()
916 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
917 .count()
918 }
919
920 fn source_ascii_plain_word_len(&self) -> usize {
921 self.cursor
922 .rest()
923 .as_bytes()
924 .iter()
925 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
926 .count()
927 }
928
929 fn find_double_quote_special(source: &str) -> Option<usize> {
930 source
931 .as_bytes()
932 .iter()
933 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
934 }
935
936 fn ensure_capture_from_source(
937 &self,
938 capture: &mut Option<String>,
939 start: Position,
940 end: Position,
941 ) {
942 if capture.is_none() {
943 *capture = Some(self.input[start.offset..end.offset].to_string());
944 }
945 }
946
947 fn push_capture_char(capture: &mut Option<String>, ch: char) {
948 if let Some(text) = capture.as_mut() {
949 text.push(ch);
950 }
951 }
952
953 fn push_capture_str(capture: &mut Option<String>, text: &str) {
954 if let Some(current) = capture.as_mut() {
955 current.push_str(text);
956 }
957 }
958
959 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
960 if let Some(timeline) = self.zsh_timeline.as_ref() {
961 while self.zsh_timeline_index < timeline.entries.len()
962 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
963 {
964 self.zsh_timeline_index += 1;
965 }
966 return if self.zsh_timeline_index == 0 {
967 self.initial_zsh_options.as_ref()
968 } else {
969 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
970 };
971 }
972
973 self.initial_zsh_options.as_ref()
974 }
975
976 fn comments_enabled(&mut self) -> bool {
977 !self
978 .current_zsh_options()
979 .is_some_and(|options| options.interactive_comments.is_definitely_off())
980 }
981
982 fn rc_quotes_enabled(&mut self) -> bool {
983 self.current_zsh_options()
984 .is_some_and(|options| options.rc_quotes.is_definitely_on())
985 }
986
987 fn ignore_braces_enabled(&mut self) -> bool {
988 self.current_zsh_options()
989 .is_some_and(|options| options.ignore_braces.is_definitely_on())
990 }
991
992 fn ignore_close_braces_enabled(&mut self) -> bool {
993 self.current_zsh_options().is_some_and(|options| {
994 options.ignore_braces.is_definitely_on()
995 || options.ignore_close_braces.is_definitely_on()
996 })
997 }
998
999 fn should_treat_hash_as_word_char(&mut self) -> bool {
1000 if !self.comments_enabled() {
1001 return true;
1002 }
1003 self.reinject_buf.is_empty()
1004 && (self
1005 .input
1006 .get(..self.offset)
1007 .and_then(|prefix| prefix.chars().next_back())
1008 .is_some_and(|prev| {
1009 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1010 })
1011 || self.is_inside_unclosed_double_paren_on_line())
1012 }
1013
1014 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1015 capture
1016 .as_deref()
1017 .unwrap_or(&self.input[start.offset..self.offset])
1018 }
1019
1020 fn current_word_surface_is_single_char(
1021 &self,
1022 start: Position,
1023 capture: &Option<String>,
1024 target: char,
1025 ) -> bool {
1026 let text = self.current_word_text(start, capture);
1027 if !text.contains('\x00') {
1028 let mut encoded = [0; 4];
1029 return text == target.encode_utf8(&mut encoded);
1030 }
1031
1032 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1033 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1034 }
1035
1036 fn current_word_surface_last_char<'b>(
1037 &'b self,
1038 start: Position,
1039 capture: &'b Option<String>,
1040 ) -> Option<char> {
1041 self.current_word_text(start, capture)
1042 .chars()
1043 .rev()
1044 .find(|&ch| ch != '\x00')
1045 }
1046
1047 fn current_word_surface_ends_with_char(
1048 &self,
1049 start: Position,
1050 capture: &Option<String>,
1051 target: char,
1052 ) -> bool {
1053 self.current_word_surface_last_char(start, capture) == Some(target)
1054 }
1055
1056 fn current_word_surface_ends_with_extglob_prefix(
1057 &self,
1058 start: Position,
1059 capture: &Option<String>,
1060 ) -> bool {
1061 self.current_word_surface_last_char(start, capture)
1062 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1063 }
1064
1065 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1067 self.skip_whitespace();
1068 let start = self.current_position();
1069 let token = self.next_lexed_token_inner(false)?;
1070 let end = self.current_position();
1071 Some(token.with_span(Span::from_positions(start, end)))
1072 }
1073
1074 pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1076 self.skip_whitespace();
1077 let start = self.current_position();
1078 let token = self.next_lexed_token_inner(true)?;
1079 let end = self.current_position();
1080 Some(token.with_span(Span::from_positions(start, end)))
1081 }
1082
1083 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1085 let ch = self.peek_char()?;
1086
1087 match ch {
1088 '\n' => {
1089 self.consume_ascii_chars(1);
1090 Some(LexedToken::punctuation(TokenKind::Newline))
1091 }
1092 ';' => {
1093 if self.second_char() == Some(';') {
1094 if self.third_char() == Some('&') {
1095 self.consume_ascii_chars(3);
1096 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1098 self.consume_ascii_chars(2);
1099 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1101 } else if self.second_char() == Some('|') {
1102 self.consume_ascii_chars(2);
1103 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1105 self.consume_ascii_chars(2);
1106 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1108 self.consume_ascii_chars(1);
1109 Some(LexedToken::punctuation(TokenKind::Semicolon))
1110 }
1111 }
1112 '|' => {
1113 if self.second_char() == Some('|') {
1114 self.consume_ascii_chars(2);
1115 Some(LexedToken::punctuation(TokenKind::Or))
1116 } else if self.second_char() == Some('&') {
1117 self.consume_ascii_chars(2);
1118 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1119 } else {
1120 self.consume_ascii_chars(1);
1121 Some(LexedToken::punctuation(TokenKind::Pipe))
1122 }
1123 }
1124 '&' => {
1125 if self.second_char() == Some('&') {
1126 self.consume_ascii_chars(2);
1127 Some(LexedToken::punctuation(TokenKind::And))
1128 } else if self.second_char() == Some('>') {
1129 if self.third_char() == Some('>') {
1130 self.consume_ascii_chars(3);
1131 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1132 } else {
1133 self.consume_ascii_chars(2);
1134 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1135 }
1136 } else if self.second_char() == Some('|') {
1137 self.consume_ascii_chars(2);
1138 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1139 } else if self.second_char() == Some('!') {
1140 self.consume_ascii_chars(2);
1141 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1142 } else {
1143 self.consume_ascii_chars(1);
1144 Some(LexedToken::punctuation(TokenKind::Background))
1145 }
1146 }
1147 '>' => {
1148 if self.second_char() == Some('>') {
1149 if self.third_char() == Some('|') {
1150 self.consume_ascii_chars(3);
1151 } else {
1152 self.consume_ascii_chars(2);
1153 }
1154 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1155 } else if self.second_char() == Some('|') {
1156 self.consume_ascii_chars(2);
1157 Some(LexedToken::punctuation(TokenKind::Clobber))
1158 } else if self.second_char() == Some('(') {
1159 self.consume_ascii_chars(2);
1160 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1161 } else if self.second_char() == Some('&') {
1162 self.consume_ascii_chars(2);
1163 Some(LexedToken::punctuation(TokenKind::DupOutput))
1164 } else {
1165 self.consume_ascii_chars(1);
1166 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1167 }
1168 }
1169 '<' => {
1170 if self.second_char() == Some('<') {
1171 if self.third_char() == Some('<') {
1172 self.consume_ascii_chars(3);
1173 Some(LexedToken::punctuation(TokenKind::HereString))
1174 } else if self.third_char() == Some('-') {
1175 self.consume_ascii_chars(3);
1176 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1177 } else {
1178 self.consume_ascii_chars(2);
1179 Some(LexedToken::punctuation(TokenKind::HereDoc))
1180 }
1181 } else if self.second_char() == Some('>') {
1182 self.consume_ascii_chars(2);
1183 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1184 } else if self.second_char() == Some('(') {
1185 self.consume_ascii_chars(2);
1186 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1187 } else if self.second_char() == Some('&') {
1188 self.consume_ascii_chars(2);
1189 Some(LexedToken::punctuation(TokenKind::DupInput))
1190 } else {
1191 self.consume_ascii_chars(1);
1192 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1193 }
1194 }
1195 '(' => {
1196 if self.second_char() == Some('(') {
1197 self.consume_ascii_chars(2);
1198 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1199 } else {
1200 self.consume_ascii_chars(1);
1201 Some(LexedToken::punctuation(TokenKind::LeftParen))
1202 }
1203 }
1204 ')' => {
1205 if self.second_char() == Some(')') {
1206 self.consume_ascii_chars(2);
1207 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1208 } else {
1209 self.consume_ascii_chars(1);
1210 Some(LexedToken::punctuation(TokenKind::RightParen))
1211 }
1212 }
1213 '{' => {
1214 if self.ignore_braces_enabled() {
1215 let start = self.current_position();
1216 self.consume_ascii_chars(1);
1217 match self.peek_char() {
1218 Some(' ') | Some('\t') | Some('\n') | None => {
1219 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1220 }
1221 _ => self.read_word_starting_with("{", start),
1222 }
1223 } else if self.looks_like_brace_expansion() {
1224 self.read_brace_expansion_word()
1228 } else if self.is_brace_group_start() {
1229 self.advance();
1230 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1231 } else {
1232 self.read_brace_literal_word()
1234 }
1235 }
1236 '}' => {
1237 self.consume_ascii_chars(1);
1238 if self.ignore_close_braces_enabled() {
1239 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1240 } else {
1241 Some(LexedToken::punctuation(TokenKind::RightBrace))
1242 }
1243 }
1244 '[' => {
1245 let start = self.current_position();
1246 self.consume_ascii_chars(1);
1247 if self.peek_char() == Some('[')
1248 && matches!(
1249 self.second_char(),
1250 Some(' ') | Some('\t') | Some('\n') | None
1251 )
1252 {
1253 self.consume_ascii_chars(1);
1254 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1255 } else {
1256 match self.peek_char() {
1263 Some(' ') | Some('\t') | Some('\n') | None => {
1264 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1265 }
1266 _ => self.read_word_starting_with("[", start),
1267 }
1268 }
1269 }
1270 ']' => {
1271 if self.second_char() == Some(']') {
1272 self.consume_ascii_chars(2);
1273 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1274 } else {
1275 self.consume_ascii_chars(1);
1276 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1277 }
1278 }
1279 '\'' => self.read_single_quoted_string(),
1280 '"' => self.read_double_quoted_string(),
1281 '#' => {
1282 if self.should_treat_hash_as_word_char() {
1283 let start = self.current_position();
1284 return self.read_word_starting_with("#", start);
1285 }
1286 if preserve_comments {
1287 self.read_comment();
1288 Some(LexedToken::comment())
1289 } else {
1290 self.skip_comment();
1291 self.next_lexed_token_inner(false)
1292 }
1293 }
1294 '0'..='9' => self.read_word_or_fd_redirect(),
1296 _ => self.read_word(),
1297 }
1298 }
1299
1300 fn skip_whitespace(&mut self) {
1301 while let Some(ch) = self.peek_char() {
1302 if self.reinject_buf.is_empty() {
1303 let whitespace_len = self.source_horizontal_whitespace_len();
1304 if whitespace_len > 0 {
1305 self.consume_source_bytes(whitespace_len);
1306 continue;
1307 }
1308
1309 if self.cursor.rest().starts_with("\\\n") {
1310 self.consume_source_bytes(2);
1311 continue;
1312 }
1313 }
1314
1315 if ch == ' ' || ch == '\t' {
1316 self.consume_ascii_chars(1);
1317 } else if ch == '\\' {
1318 if self.second_char() == Some('\n') {
1320 self.consume_ascii_chars(2);
1321 } else {
1322 break;
1323 }
1324 } else {
1325 break;
1326 }
1327 }
1328 }
1329
1330 fn skip_comment(&mut self) {
1331 if self.reinject_buf.is_empty() {
1332 let end = self
1333 .cursor
1334 .find_byte(b'\n')
1335 .unwrap_or(self.cursor.rest().len());
1336 self.consume_source_bytes(end);
1337 return;
1338 }
1339
1340 while let Some(ch) = self.peek_char() {
1341 if ch == '\n' {
1342 break;
1343 }
1344 self.advance();
1345 }
1346 }
1347
1348 fn read_comment(&mut self) {
1349 debug_assert_eq!(self.peek_char(), Some('#'));
1350
1351 if self.reinject_buf.is_empty() {
1352 let rest = self.cursor.rest();
1353 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1354 self.consume_source_bytes(end);
1355 return;
1356 }
1357
1358 self.advance(); while let Some(ch) = self.peek_char() {
1361 if ch == '\n' {
1362 break;
1363 }
1364 self.advance();
1365 }
1366 }
1367
1368 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1369 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1370 return false;
1371 }
1372
1373 let line_start = self.input[..self.offset]
1374 .rfind('\n')
1375 .map_or(0, |index| index + 1);
1376 let prefix = &self.input[line_start..self.offset];
1377 line_has_unclosed_double_paren(prefix)
1378 }
1379
1380 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1383 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1384 let fd: i32 = first_digit.to_digit(10).unwrap() as i32;
1385
1386 match (self.second_char(), self.third_char()) {
1387 (Some('>'), Some('>')) => {
1388 if self.fourth_char() == Some('|') {
1389 self.consume_ascii_chars(4);
1390 } else {
1391 self.consume_ascii_chars(3);
1392 }
1393 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1394 }
1395 (Some('>'), Some('|')) => {
1396 self.consume_ascii_chars(3);
1397 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1398 }
1399 (Some('>'), Some('&')) => {
1400 self.consume_ascii_chars(3);
1401
1402 let mut target_str = String::with_capacity(4);
1403 while let Some(c) = self.peek_char() {
1404 if c.is_ascii_digit() {
1405 target_str.push(c);
1406 self.advance();
1407 } else {
1408 break;
1409 }
1410 }
1411
1412 if target_str.is_empty() {
1413 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1414 }
1415
1416 let target_fd: i32 = target_str.parse().unwrap_or(1);
1417 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1418 }
1419 (Some('>'), _) => {
1420 self.consume_ascii_chars(2);
1421 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1422 }
1423 (Some('<'), Some('&')) => {
1424 self.consume_ascii_chars(3);
1425
1426 let mut target_str = String::with_capacity(4);
1427 while let Some(c) = self.peek_char() {
1428 if c.is_ascii_digit() || c == '-' {
1429 target_str.push(c);
1430 self.advance();
1431 if c == '-' {
1432 break;
1433 }
1434 } else {
1435 break;
1436 }
1437 }
1438
1439 if target_str == "-" {
1440 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1441 }
1442 let target_fd: i32 = target_str.parse().unwrap_or(0);
1443 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1444 }
1445 (Some('<'), Some('>')) => {
1446 self.consume_ascii_chars(3);
1447 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1448 }
1449 (Some('<'), Some('<')) => {}
1450 (Some('<'), _) => {
1451 self.consume_ascii_chars(2);
1452 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1453 }
1454 _ => {}
1455 }
1456 }
1457
1458 self.read_word()
1460 }
1461
1462 fn read_word_starting_with(
1463 &mut self,
1464 _prefix: &str,
1465 start: Position,
1466 ) -> Option<LexedToken<'a>> {
1467 let segment = match self.read_unquoted_segment(start) {
1468 Ok(segment) => segment,
1469 Err(kind) => return Some(LexedToken::error(kind)),
1470 };
1471 if segment.as_str().is_empty() {
1472 return None;
1473 }
1474 let mut lexed_word = LexedWord::from_segment(segment);
1475 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1476 return Some(LexedToken::error(kind));
1477 }
1478 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1479 }
1480
1481 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1482 let start = self.current_position();
1483
1484 if self.reinject_buf.is_empty() {
1485 let ascii_len = self.source_ascii_plain_word_len();
1486 let chunk = if ascii_len > 0
1487 && self
1488 .cursor
1489 .rest()
1490 .as_bytes()
1491 .get(ascii_len)
1492 .is_none_or(|byte| byte.is_ascii())
1493 {
1494 self.consume_source_bytes(ascii_len);
1495 &self.input[start.offset..self.offset]
1496 } else {
1497 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1498 self.advance_scanned_source_bytes(chunk.len());
1499 chunk
1500 };
1501 if !chunk.is_empty() {
1502 let continues = matches!(
1503 self.peek_char(),
1504 Some(next)
1505 if Self::is_word_char(next)
1506 || next == '$'
1507 || matches!(next, '\'' | '"')
1508 || next == '{'
1509 || (next == '('
1510 && (chunk.ends_with('=')
1511 || Self::word_can_take_parenthesized_suffix(chunk)))
1512 );
1513
1514 if !continues {
1515 let end = self.current_position();
1516 return Some(LexedToken::borrowed_word(
1517 TokenKind::Word,
1518 &self.input[start.offset..self.offset],
1519 Some(Span::from_positions(start, end)),
1520 ));
1521 }
1522
1523 if self.peek_char() == Some('(')
1524 && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1525 {
1526 return self.read_complex_word(start);
1527 }
1528
1529 let end = self.current_position();
1530 return self.finish_segmented_word(LexedWord::borrowed(
1531 LexedWordSegmentKind::Plain,
1532 &self.input[start.offset..self.offset],
1533 Some(Span::from_positions(start, end)),
1534 ));
1535 }
1536 }
1537
1538 self.read_complex_word(start)
1539 }
1540
1541 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1542 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1543 return Some(LexedToken::error(kind));
1544 }
1545
1546 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1547 }
1548
1549 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1550 if self.peek_char() == Some('$') {
1551 match self.second_char() {
1552 Some('\'') => return self.read_dollar_single_quoted_string(),
1553 Some('"') => return self.read_dollar_double_quoted_string(),
1554 _ => {}
1555 }
1556 }
1557
1558 let segment = match self.read_unquoted_segment(start) {
1559 Ok(segment) => segment,
1560 Err(kind) => return Some(LexedToken::error(kind)),
1561 };
1562
1563 if segment.as_str().is_empty() {
1564 return None;
1565 }
1566
1567 self.finish_segmented_word(LexedWord::from_segment(segment))
1568 }
1569
1570 fn read_unquoted_segment(
1571 &mut self,
1572 start: Position,
1573 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1574 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1575 while let Some(ch) = self.peek_char() {
1576 if ch == '"' || ch == '\'' {
1577 break;
1578 } else if ch == '$' {
1579 if matches!(self.second_char(), Some('\'') | Some('"'))
1580 && (self.current_position().offset > start.offset
1581 || word.as_ref().is_some_and(|word| !word.is_empty()))
1582 {
1583 break;
1584 }
1585
1586 self.advance();
1588
1589 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1593 Self::push_capture_char(&mut word, '[');
1594 self.advance();
1595 if !self.read_legacy_arithmetic_into(&mut word, start) {
1596 return Err(LexerErrorKind::CommandSubstitution);
1597 }
1598 } else if self.peek_char() == Some('(') {
1599 if self.second_char() == Some('(') {
1600 if !self.read_arithmetic_expansion_into(&mut word) {
1601 return Err(LexerErrorKind::CommandSubstitution);
1602 }
1603 } else {
1604 Self::push_capture_char(&mut word, '(');
1605 self.advance();
1606 if !self.read_command_subst_into(&mut word) {
1607 return Err(LexerErrorKind::CommandSubstitution);
1608 }
1609 }
1610 } else if self.peek_char() == Some('{') {
1611 Self::push_capture_char(&mut word, '{');
1614 self.advance();
1615 let _ = self.read_param_expansion_into(&mut word, start);
1616 } else {
1617 if let Some(c) = self.peek_char() {
1619 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1620 || c.is_ascii_digit()
1621 {
1622 Self::push_capture_char(&mut word, c);
1623 self.advance();
1624 } else {
1625 while let Some(c) = self.peek_char() {
1627 if c.is_ascii_alphanumeric() || c == '_' {
1628 Self::push_capture_char(&mut word, c);
1629 self.advance();
1630 } else {
1631 break;
1632 }
1633 }
1634 }
1635 }
1636 }
1637 } else if ch == '{' {
1638 if self.looks_like_mid_word_brace_segment() {
1639 Self::push_capture_char(&mut word, ch);
1642 self.advance();
1643 let mut depth = 1;
1644 while let Some(c) = self.peek_char() {
1645 Self::push_capture_char(&mut word, c);
1646 self.advance();
1647 if c == '{' {
1648 depth += 1;
1649 } else if c == '}' {
1650 depth -= 1;
1651 if depth == 0 {
1652 break;
1653 }
1654 }
1655 }
1656 } else {
1657 Self::push_capture_char(&mut word, ch);
1660 self.advance();
1661 }
1662 } else if ch == '`' {
1663 let capture_end = self.current_position();
1666 self.ensure_capture_from_source(&mut word, start, capture_end);
1667 Self::push_capture_char(&mut word, ch);
1668 self.advance(); let mut closed = false;
1670 while let Some(c) = self.peek_char() {
1671 Self::push_capture_char(&mut word, c);
1672 self.advance();
1673 if c == '`' {
1674 closed = true;
1675 break;
1676 }
1677 if c == '\\'
1678 && let Some(next) = self.peek_char()
1679 {
1680 Self::push_capture_char(&mut word, next);
1681 self.advance();
1682 }
1683 }
1684 if !closed {
1685 return Err(LexerErrorKind::BacktickSubstitution);
1686 }
1687 } else if ch == '\\' {
1688 let capture_end = self.current_position();
1689 self.ensure_capture_from_source(&mut word, start, capture_end);
1690 self.advance();
1691 if let Some(next) = self.peek_char() {
1692 if next == '\n' {
1693 self.advance();
1695 } else {
1696 Self::push_capture_char(&mut word, '\x00');
1701 Self::push_capture_char(&mut word, next);
1702 self.advance();
1703 if next == '{'
1704 && self.current_word_surface_is_single_char(start, &word, '{')
1705 && self.escaped_brace_sequence_looks_like_brace_expansion()
1706 {
1707 let mut depth = 1;
1708 while let Some(c) = self.peek_char() {
1709 Self::push_capture_char(&mut word, c);
1710 self.advance();
1711 match c {
1712 '{' => depth += 1,
1713 '}' => {
1714 depth -= 1;
1715 if depth == 0 {
1716 break;
1717 }
1718 }
1719 _ => {}
1720 }
1721 }
1722 }
1723 }
1724 } else {
1725 Self::push_capture_char(&mut word, '\\');
1726 }
1727 } else if ch == '('
1728 && self.current_word_surface_ends_with_char(start, &word, '=')
1729 && self.looks_like_assoc_assign()
1730 {
1731 Self::push_capture_char(&mut word, ch);
1734 self.advance();
1735 let mut depth = 1;
1736 while let Some(c) = self.peek_char() {
1737 Self::push_capture_char(&mut word, c);
1738 self.advance();
1739 match c {
1740 '(' => depth += 1,
1741 ')' => {
1742 depth -= 1;
1743 if depth == 0 {
1744 break;
1745 }
1746 }
1747 '"' => {
1748 while let Some(qc) = self.peek_char() {
1749 Self::push_capture_char(&mut word, qc);
1750 self.advance();
1751 if qc == '"' {
1752 break;
1753 }
1754 if qc == '\\'
1755 && let Some(esc) = self.peek_char()
1756 {
1757 Self::push_capture_char(&mut word, esc);
1758 self.advance();
1759 }
1760 }
1761 }
1762 '\'' => {
1763 while let Some(qc) = self.peek_char() {
1764 Self::push_capture_char(&mut word, qc);
1765 self.advance();
1766 if qc == '\'' {
1767 break;
1768 }
1769 }
1770 }
1771 '\\' => {
1772 if let Some(esc) = self.peek_char() {
1773 Self::push_capture_char(&mut word, esc);
1774 self.advance();
1775 }
1776 }
1777 _ => {}
1778 }
1779 }
1780 } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1781 {
1782 Self::push_capture_char(&mut word, ch);
1785 self.advance();
1786 let mut depth = 1;
1787 while let Some(c) = self.peek_char() {
1788 Self::push_capture_char(&mut word, c);
1789 self.advance();
1790 match c {
1791 '(' => depth += 1,
1792 ')' => {
1793 depth -= 1;
1794 if depth == 0 {
1795 break;
1796 }
1797 }
1798 '\\' => {
1799 if let Some(esc) = self.peek_char() {
1800 Self::push_capture_char(&mut word, esc);
1801 self.advance();
1802 }
1803 }
1804 _ => {}
1805 }
1806 }
1807 } else if Self::is_plain_word_char(ch) {
1808 if self.reinject_buf.is_empty() {
1809 let ascii_len = self.source_ascii_plain_word_len();
1810 let chunk = if ascii_len > 0
1811 && self
1812 .cursor
1813 .rest()
1814 .as_bytes()
1815 .get(ascii_len)
1816 .is_none_or(|byte| byte.is_ascii())
1817 {
1818 self.consume_source_bytes(ascii_len);
1819 &self.input[self.offset - ascii_len..self.offset]
1820 } else {
1821 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1822 self.advance_scanned_source_bytes(chunk.len());
1823 chunk
1824 };
1825 Self::push_capture_str(&mut word, chunk);
1826 } else {
1827 Self::push_capture_char(&mut word, ch);
1828 self.advance();
1829 }
1830 } else {
1831 break;
1832 }
1833 }
1834
1835 if let Some(word) = word {
1836 let span = Some(Span::from_positions(start, self.current_position()));
1837 Ok(LexedWordSegment::owned_with_spans(
1838 LexedWordSegmentKind::Plain,
1839 word,
1840 span,
1841 span,
1842 ))
1843 } else {
1844 let end = self.current_position();
1845 Ok(LexedWordSegment::borrowed(
1846 LexedWordSegmentKind::Plain,
1847 &self.input[start.offset..self.offset],
1848 Some(Span::from_positions(start, end)),
1849 ))
1850 }
1851 }
1852
1853 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1854 let segment = match self.read_single_quoted_segment() {
1855 Ok(segment) => segment,
1856 Err(kind) => return Some(LexedToken::error(kind)),
1857 };
1858 let mut word = LexedWord::from_segment(segment);
1859 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1860 return Some(LexedToken::error(kind));
1861 }
1862
1863 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1864 }
1865
1866 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1867 debug_assert_eq!(self.peek_char(), Some('\''));
1868
1869 let wrapper_start = self.current_position();
1870 self.consume_ascii_chars(1); let content_start = self.current_position();
1872 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1873 let mut content_end = content_start;
1874 let mut content = String::with_capacity(16);
1875 let mut closed = false;
1876
1877 if can_borrow {
1878 let rest = self.cursor.rest();
1879 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1880 self.consume_source_bytes(quote_index);
1881 content_end = self.current_position();
1882 self.consume_ascii_chars(1); closed = true;
1884 } else {
1885 self.consume_source_bytes(rest.len());
1886 }
1887 }
1888
1889 while let Some(ch) = self.peek_char() {
1890 if closed {
1891 break;
1892 }
1893 if ch == '\'' {
1894 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1895 if !can_borrow {
1896 content.push('\'');
1897 }
1898 self.advance();
1899 self.advance();
1900 continue;
1901 }
1902 content_end = self.current_position();
1903 self.consume_ascii_chars(1); closed = true;
1905 break;
1906 }
1907 if !can_borrow {
1908 content.push(ch);
1909 }
1910 self.advance();
1911 }
1912
1913 if !closed {
1914 return Err(LexerErrorKind::SingleQuote);
1915 }
1916
1917 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1918 let content_span = Some(Span::from_positions(content_start, content_end));
1919
1920 if can_borrow {
1921 Ok(LexedWordSegment::borrowed_with_spans(
1922 LexedWordSegmentKind::SingleQuoted,
1923 &self.input[content_start.offset..content_end.offset],
1924 content_span,
1925 wrapper_span,
1926 ))
1927 } else {
1928 Ok(LexedWordSegment::owned_with_spans(
1929 LexedWordSegmentKind::SingleQuoted,
1930 content,
1931 content_span,
1932 wrapper_span,
1933 ))
1934 }
1935 }
1936
1937 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1938 let segment = match self.read_dollar_single_quoted_segment() {
1939 Ok(segment) => segment,
1940 Err(kind) => return Some(LexedToken::error(kind)),
1941 };
1942 let mut word = LexedWord::from_segment(segment);
1943 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1944 return Some(LexedToken::error(kind));
1945 }
1946
1947 let kind = if word.single_segment().is_some() {
1948 TokenKind::LiteralWord
1949 } else {
1950 TokenKind::Word
1951 };
1952
1953 Some(LexedToken::with_word_payload(kind, word))
1954 }
1955
1956 fn read_dollar_single_quoted_segment(
1957 &mut self,
1958 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1959 debug_assert_eq!(self.peek_char(), Some('$'));
1960 debug_assert_eq!(self.second_char(), Some('\''));
1961
1962 let wrapper_start = self.current_position();
1963 self.consume_ascii_chars(2); let content_start = self.current_position();
1965 let mut out = String::with_capacity(16);
1966
1967 while let Some(ch) = self.peek_char() {
1968 if ch == '\'' {
1969 let content_end = self.current_position();
1970 self.advance();
1971 let wrapper_span =
1972 Some(Span::from_positions(wrapper_start, self.current_position()));
1973 let content_span = Some(Span::from_positions(content_start, content_end));
1974 return Ok(LexedWordSegment::owned_with_spans(
1975 LexedWordSegmentKind::DollarSingleQuoted,
1976 out,
1977 content_span,
1978 wrapper_span,
1979 ));
1980 }
1981
1982 if ch == '\\' {
1983 self.advance();
1984 if let Some(esc) = self.peek_char() {
1985 self.advance();
1986 match esc {
1987 'n' => out.push('\n'),
1988 't' => out.push('\t'),
1989 'r' => out.push('\r'),
1990 'a' => out.push('\x07'),
1991 'b' => out.push('\x08'),
1992 'f' => out.push('\x0C'),
1993 'v' => out.push('\x0B'),
1994 'e' | 'E' => out.push('\x1B'),
1995 '\\' => out.push('\\'),
1996 '\'' => out.push('\''),
1997 '"' => out.push('"'),
1998 '?' => out.push('?'),
1999 'c' => {
2000 if let Some(control) = self.peek_char() {
2001 self.advance();
2002 out.push(((control as u32 & 0x1F) as u8) as char);
2003 } else {
2004 out.push('\\');
2005 out.push('c');
2006 }
2007 }
2008 'x' => {
2009 let mut hex = String::new();
2010 for _ in 0..2 {
2011 if let Some(h) = self.peek_char() {
2012 if h.is_ascii_hexdigit() {
2013 hex.push(h);
2014 self.advance();
2015 } else {
2016 break;
2017 }
2018 }
2019 }
2020 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2021 out.push(val as char);
2022 }
2023 }
2024 'u' => {
2025 let mut hex = String::new();
2026 for _ in 0..4 {
2027 if let Some(h) = self.peek_char() {
2028 if h.is_ascii_hexdigit() {
2029 hex.push(h);
2030 self.advance();
2031 } else {
2032 break;
2033 }
2034 }
2035 }
2036 if let Ok(val) = u32::from_str_radix(&hex, 16)
2037 && let Some(c) = char::from_u32(val)
2038 {
2039 out.push(c);
2040 }
2041 }
2042 'U' => {
2043 let mut hex = String::new();
2044 for _ in 0..8 {
2045 if let Some(h) = self.peek_char() {
2046 if h.is_ascii_hexdigit() {
2047 hex.push(h);
2048 self.advance();
2049 } else {
2050 break;
2051 }
2052 }
2053 }
2054 if let Ok(val) = u32::from_str_radix(&hex, 16)
2055 && let Some(c) = char::from_u32(val)
2056 {
2057 out.push(c);
2058 }
2059 }
2060 '0'..='7' => {
2061 let mut oct = String::new();
2062 oct.push(esc);
2063 for _ in 0..2 {
2064 if let Some(o) = self.peek_char() {
2065 if o.is_ascii_digit() && o < '8' {
2066 oct.push(o);
2067 self.advance();
2068 } else {
2069 break;
2070 }
2071 }
2072 }
2073 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2074 out.push(val as char);
2075 }
2076 }
2077 _ => {
2078 out.push('\\');
2079 out.push(esc);
2080 }
2081 }
2082 } else {
2083 out.push('\\');
2084 }
2085 continue;
2086 }
2087
2088 out.push(ch);
2089 self.advance();
2090 }
2091
2092 Err(LexerErrorKind::SingleQuote)
2093 }
2094
2095 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2096 let start = self.current_position();
2097
2098 if self.reinject_buf.is_empty() {
2099 let ascii_len = self.source_ascii_plain_word_len();
2100 let chunk = if ascii_len > 0
2101 && self
2102 .cursor
2103 .rest()
2104 .as_bytes()
2105 .get(ascii_len)
2106 .is_none_or(|byte| byte.is_ascii())
2107 {
2108 self.consume_source_bytes(ascii_len);
2109 &self.input[start.offset..self.offset]
2110 } else {
2111 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2112 self.advance_scanned_source_bytes(chunk.len());
2113 chunk
2114 };
2115 if chunk.is_empty() {
2116 return None;
2117 }
2118
2119 let end = self.current_position();
2120 return Some(LexedWordSegment::borrowed(
2121 LexedWordSegmentKind::Plain,
2122 &self.input[start.offset..self.offset],
2123 Some(Span::from_positions(start, end)),
2124 ));
2125 }
2126
2127 let ch = self.peek_char()?;
2128 if !Self::is_plain_word_char(ch) {
2129 return None;
2130 }
2131
2132 let mut text = String::with_capacity(16);
2133 while let Some(ch) = self.peek_char() {
2134 if !Self::is_plain_word_char(ch) {
2135 break;
2136 }
2137 text.push(ch);
2138 self.advance();
2139 }
2140
2141 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2142 }
2143
2144 fn append_segmented_continuation(
2147 &mut self,
2148 word: &mut LexedWord<'a>,
2149 ) -> Result<(), LexerErrorKind> {
2150 loop {
2151 match self.peek_char() {
2152 Some('\'') => {
2153 word.push_segment(self.read_single_quoted_segment()?);
2154 }
2155 Some('"') => {
2156 word.push_segment(self.read_double_quoted_segment()?);
2157 }
2158 Some('$') if self.second_char() == Some('\'') => {
2159 word.push_segment(self.read_dollar_single_quoted_segment()?);
2160 }
2161 Some('$') if self.second_char() == Some('"') => {
2162 word.push_segment(self.read_dollar_double_quoted_segment()?);
2163 }
2164 Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2165 let segment = self
2166 .read_parenthesized_word_suffix_segment()
2167 .expect("peeked '(' should produce a suffix segment");
2168 word.push_segment(segment);
2169 }
2170 _ => {
2171 if let Some(segment) = self.read_plain_continuation_segment() {
2172 word.push_segment(segment);
2173 continue;
2174 }
2175
2176 let start = self.current_position();
2177 let plain = self.read_unquoted_segment(start)?;
2178 if plain.as_str().is_empty() {
2179 break;
2180 }
2181 word.push_segment(plain);
2182 }
2183 }
2184 }
2185
2186 Ok(())
2187 }
2188
2189 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2190 debug_assert_eq!(self.peek_char(), Some('('));
2191
2192 let start = self.current_position();
2193 let mut depth = 0usize;
2194 let mut escaped = false;
2195 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2196
2197 while let Some(ch) = self.peek_char() {
2198 if let Some(text) = text.as_mut() {
2199 text.push(ch);
2200 }
2201 self.advance();
2202
2203 if escaped {
2204 escaped = false;
2205 continue;
2206 }
2207
2208 match ch {
2209 '\\' => escaped = true,
2210 '(' => depth += 1,
2211 ')' => {
2212 depth = depth.saturating_sub(1);
2213 if depth == 0 {
2214 break;
2215 }
2216 }
2217 _ => {}
2218 }
2219 }
2220
2221 let end = self.current_position();
2222 let span = Some(Span::from_positions(start, end));
2223 if let Some(text) = text {
2224 Some(LexedWordSegment::owned_with_spans(
2225 LexedWordSegmentKind::Plain,
2226 text,
2227 span,
2228 span,
2229 ))
2230 } else {
2231 Some(LexedWordSegment::borrowed_with_spans(
2232 LexedWordSegmentKind::Plain,
2233 &self.input[start.offset..end.offset],
2234 span,
2235 span,
2236 ))
2237 }
2238 }
2239
2240 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2241 self.read_double_quoted_word(false)
2242 }
2243
2244 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2245 self.read_double_quoted_word(true)
2246 }
2247
2248 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2249 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2250 Ok(segment) => segment,
2251 Err(kind) => return Some(LexedToken::error(kind)),
2252 };
2253 let mut word = LexedWord::from_segment(segment);
2254 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2255 return Some(LexedToken::error(kind));
2256 }
2257
2258 let kind = if word.single_segment().is_some() {
2259 TokenKind::QuotedWord
2260 } else {
2261 TokenKind::Word
2262 };
2263
2264 Some(LexedToken::with_word_payload(kind, word))
2265 }
2266
2267 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2268 self.read_double_quoted_segment_with_dollar(false)
2269 }
2270
2271 fn read_dollar_double_quoted_segment(
2272 &mut self,
2273 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2274 self.read_double_quoted_segment_with_dollar(true)
2275 }
2276
2277 fn read_double_quoted_segment_with_dollar(
2278 &mut self,
2279 dollar: bool,
2280 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2281 if dollar {
2282 debug_assert_eq!(self.peek_char(), Some('$'));
2283 debug_assert_eq!(self.second_char(), Some('"'));
2284 } else {
2285 debug_assert_eq!(self.peek_char(), Some('"'));
2286 }
2287
2288 let wrapper_start = self.current_position();
2289 if dollar {
2290 self.consume_ascii_chars(2); } else {
2292 self.consume_ascii_chars(1); }
2294 let content_start = self.current_position();
2295 let mut content_end = content_start;
2296 let mut simple = self.reinject_buf.is_empty();
2297 let mut borrowable = self.reinject_buf.is_empty();
2298 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2299 let mut closed = false;
2300
2301 while let Some(ch) = self.peek_char() {
2302 if simple {
2303 if self.reinject_buf.is_empty() {
2304 let rest = self.cursor.rest();
2305 match Self::find_double_quote_special(rest) {
2306 Some(index) if index > 0 => {
2307 self.consume_source_bytes(index);
2308 continue;
2309 }
2310 None => {
2311 self.consume_source_bytes(rest.len());
2312 return Err(LexerErrorKind::DoubleQuote);
2313 }
2314 _ => {}
2315 }
2316 }
2317
2318 match ch {
2319 '"' => {
2320 content_end = self.current_position();
2321 self.consume_ascii_chars(1); closed = true;
2323 break;
2324 }
2325 '\\' | '$' | '`' => {
2326 simple = false;
2327 if ch == '`' {
2328 borrowable = false;
2329 let capture_end = self.current_position();
2330 self.ensure_capture_from_source(
2331 &mut content,
2332 content_start,
2333 capture_end,
2334 );
2335 }
2336 }
2337 _ => {
2338 self.advance();
2339 }
2340 }
2341 if simple {
2342 continue;
2343 }
2344 }
2345
2346 match ch {
2347 '"' => {
2348 if borrowable {
2349 content_end = self.current_position();
2350 }
2351 self.consume_ascii_chars(1); closed = true;
2353 break;
2354 }
2355 '\\' => {
2356 let escape_start = self.current_position();
2357 self.advance();
2358 if let Some(next) = self.peek_char() {
2359 match next {
2360 '\n' => {
2361 borrowable = false;
2362 self.ensure_capture_from_source(
2363 &mut content,
2364 content_start,
2365 escape_start,
2366 );
2367 self.advance();
2368 }
2369 '$' => {
2370 borrowable = false;
2371 self.ensure_capture_from_source(
2372 &mut content,
2373 content_start,
2374 escape_start,
2375 );
2376 Self::push_capture_char(&mut content, '\x00');
2377 Self::push_capture_char(&mut content, '$');
2378 self.advance();
2379 }
2380 '"' | '\\' | '`' => {
2381 borrowable = false;
2382 self.ensure_capture_from_source(
2383 &mut content,
2384 content_start,
2385 escape_start,
2386 );
2387 if next == '\\' {
2388 Self::push_capture_char(&mut content, '\x00');
2389 }
2390 if next == '`' {
2391 Self::push_capture_char(&mut content, '\x00');
2392 }
2393 Self::push_capture_char(&mut content, next);
2394 self.advance();
2395 content_end = self.current_position();
2396 }
2397 _ => {
2398 Self::push_capture_char(&mut content, '\\');
2399 Self::push_capture_char(&mut content, next);
2400 self.advance();
2401 content_end = self.current_position();
2402 }
2403 }
2404 }
2405 }
2406 '$' => {
2407 Self::push_capture_char(&mut content, '$');
2408 self.advance();
2409 if self.peek_char() == Some('(') {
2410 if self.second_char() == Some('(') {
2411 self.read_arithmetic_expansion_into(&mut content);
2412 } else {
2413 Self::push_capture_char(&mut content, '(');
2414 self.advance();
2415 self.read_command_subst_into(&mut content);
2416 }
2417 } else if self.peek_char() == Some('{') {
2418 Self::push_capture_char(&mut content, '{');
2419 self.advance();
2420 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2421 }
2422 content_end = self.current_position();
2423 }
2424 '`' => {
2425 borrowable = false;
2426 let capture_end = self.current_position();
2427 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2428 Self::push_capture_char(&mut content, '`');
2429 self.advance(); while let Some(c) = self.peek_char() {
2431 Self::push_capture_char(&mut content, c);
2432 self.advance();
2433 if c == '`' {
2434 break;
2435 }
2436 if c == '\\'
2437 && let Some(next) = self.peek_char()
2438 {
2439 Self::push_capture_char(&mut content, next);
2440 self.advance();
2441 }
2442 }
2443 content_end = self.current_position();
2444 }
2445 _ => {
2446 Self::push_capture_char(&mut content, ch);
2447 self.advance();
2448 content_end = self.current_position();
2449 }
2450 }
2451 }
2452
2453 if !closed {
2454 return Err(LexerErrorKind::DoubleQuote);
2455 }
2456
2457 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2458 let content_span = Some(Span::from_positions(content_start, content_end));
2459
2460 if borrowable {
2461 Ok(LexedWordSegment::borrowed_with_spans(
2462 if dollar {
2463 LexedWordSegmentKind::DollarDoubleQuoted
2464 } else {
2465 LexedWordSegmentKind::DoubleQuoted
2466 },
2467 &self.input[content_start.offset..content_end.offset],
2468 content_span,
2469 wrapper_span,
2470 ))
2471 } else {
2472 Ok(LexedWordSegment::owned_with_spans(
2473 if dollar {
2474 LexedWordSegmentKind::DollarDoubleQuoted
2475 } else {
2476 LexedWordSegmentKind::DoubleQuoted
2477 },
2478 content.unwrap_or_default(),
2479 content_span,
2480 wrapper_span,
2481 ))
2482 }
2483 }
2484
2485 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2486 debug_assert_eq!(self.peek_char(), Some('('));
2487 debug_assert_eq!(self.second_char(), Some('('));
2488
2489 Self::push_capture_char(content, '(');
2490 self.advance();
2491 Self::push_capture_char(content, '(');
2492 self.advance();
2493
2494 let mut depth = 2;
2495 while let Some(c) = self.peek_char() {
2496 match c {
2497 '\\' => {
2498 Self::push_capture_char(content, c);
2499 self.advance();
2500 if let Some(next) = self.peek_char() {
2501 Self::push_capture_char(content, next);
2502 self.advance();
2503 }
2504 }
2505 '\'' => {
2506 Self::push_capture_char(content, c);
2507 self.advance();
2508 while let Some(quoted) = self.peek_char() {
2509 Self::push_capture_char(content, quoted);
2510 self.advance();
2511 if quoted == '\'' {
2512 break;
2513 }
2514 }
2515 }
2516 '"' => {
2517 let mut escaped = false;
2518 Self::push_capture_char(content, c);
2519 self.advance();
2520 while let Some(quoted) = self.peek_char() {
2521 Self::push_capture_char(content, quoted);
2522 self.advance();
2523 if escaped {
2524 escaped = false;
2525 continue;
2526 }
2527 match quoted {
2528 '\\' => escaped = true,
2529 '"' => break,
2530 _ => {}
2531 }
2532 }
2533 }
2534 '`' => {
2535 let mut escaped = false;
2536 Self::push_capture_char(content, c);
2537 self.advance();
2538 while let Some(quoted) = self.peek_char() {
2539 Self::push_capture_char(content, quoted);
2540 self.advance();
2541 if escaped {
2542 escaped = false;
2543 continue;
2544 }
2545 match quoted {
2546 '\\' => escaped = true,
2547 '`' => break,
2548 _ => {}
2549 }
2550 }
2551 }
2552 '(' => {
2553 Self::push_capture_char(content, c);
2554 self.advance();
2555 depth += 1;
2556 }
2557 ')' => {
2558 Self::push_capture_char(content, c);
2559 self.advance();
2560 depth -= 1;
2561 if depth == 0 {
2562 return true;
2563 }
2564 }
2565 _ => {
2566 Self::push_capture_char(content, c);
2567 self.advance();
2568 }
2569 }
2570 }
2571
2572 false
2573 }
2574
2575 fn read_legacy_arithmetic_into(
2576 &mut self,
2577 content: &mut Option<String>,
2578 segment_start: Position,
2579 ) -> bool {
2580 let mut bracket_depth = 1;
2581
2582 while let Some(c) = self.peek_char() {
2583 match c {
2584 '\\' => {
2585 Self::push_capture_char(content, c);
2586 self.advance();
2587 if let Some(next) = self.peek_char() {
2588 Self::push_capture_char(content, next);
2589 self.advance();
2590 }
2591 }
2592 '\'' => {
2593 Self::push_capture_char(content, c);
2594 self.advance();
2595 while let Some(quoted) = self.peek_char() {
2596 Self::push_capture_char(content, quoted);
2597 self.advance();
2598 if quoted == '\'' {
2599 break;
2600 }
2601 }
2602 }
2603 '"' => {
2604 let mut escaped = false;
2605 Self::push_capture_char(content, c);
2606 self.advance();
2607 while let Some(quoted) = self.peek_char() {
2608 Self::push_capture_char(content, quoted);
2609 self.advance();
2610 if escaped {
2611 escaped = false;
2612 continue;
2613 }
2614 match quoted {
2615 '\\' => escaped = true,
2616 '"' => break,
2617 _ => {}
2618 }
2619 }
2620 }
2621 '`' => {
2622 let mut escaped = false;
2623 Self::push_capture_char(content, c);
2624 self.advance();
2625 while let Some(quoted) = self.peek_char() {
2626 Self::push_capture_char(content, quoted);
2627 self.advance();
2628 if escaped {
2629 escaped = false;
2630 continue;
2631 }
2632 match quoted {
2633 '\\' => escaped = true,
2634 '`' => break,
2635 _ => {}
2636 }
2637 }
2638 }
2639 '[' => {
2640 Self::push_capture_char(content, c);
2641 self.advance();
2642 bracket_depth += 1;
2643 }
2644 ']' => {
2645 Self::push_capture_char(content, c);
2646 self.advance();
2647 bracket_depth -= 1;
2648 if bracket_depth == 0 {
2649 return true;
2650 }
2651 }
2652 '$' => {
2653 Self::push_capture_char(content, c);
2654 self.advance();
2655 if self.peek_char() == Some('(') {
2656 if self.second_char() == Some('(') {
2657 if !self.read_arithmetic_expansion_into(content) {
2658 return false;
2659 }
2660 } else {
2661 Self::push_capture_char(content, '(');
2662 self.advance();
2663 if !self.read_command_subst_into(content) {
2664 return false;
2665 }
2666 }
2667 } else if self.peek_char() == Some('{') {
2668 Self::push_capture_char(content, '{');
2669 self.advance();
2670 if !self.read_param_expansion_into(content, segment_start) {
2671 return false;
2672 }
2673 } else if self.peek_char() == Some('[') {
2674 Self::push_capture_char(content, '[');
2675 self.advance();
2676 if !self.read_legacy_arithmetic_into(content, segment_start) {
2677 return false;
2678 }
2679 }
2680 }
2681 _ => {
2682 Self::push_capture_char(content, c);
2683 self.advance();
2684 }
2685 }
2686 }
2687
2688 false
2689 }
2690
2691 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2695 self.read_command_subst_into_depth(content, 0)
2696 }
2697
2698 fn flush_command_subst_keyword(
2699 current_word: &mut String,
2700 pending_case_headers: &mut usize,
2701 case_clause_depths: &mut SmallVec<[usize; 4]>,
2702 depth: usize,
2703 word_started_at_command_start: &mut bool,
2704 ) {
2705 if current_word.is_empty() {
2706 *word_started_at_command_start = false;
2707 return;
2708 }
2709
2710 match current_word.as_str() {
2711 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2712 "in" if *pending_case_headers > 0 => {
2713 *pending_case_headers -= 1;
2714 case_clause_depths.push(depth);
2715 }
2716 "esac" if *word_started_at_command_start => {
2717 case_clause_depths.pop();
2718 }
2719 _ => {}
2720 }
2721
2722 current_word.clear();
2723 *word_started_at_command_start = false;
2724 }
2725
2726 fn read_command_subst_heredoc_delimiter_into(
2727 &mut self,
2728 content: &mut Option<String>,
2729 ) -> Option<String> {
2730 while let Some(ch) = self.peek_char() {
2731 if !matches!(ch, ' ' | '\t') {
2732 break;
2733 }
2734 Self::push_capture_char(content, ch);
2735 self.advance();
2736 }
2737
2738 let mut cooked = String::new();
2739 let mut in_single = false;
2740 let mut in_double = false;
2741 let mut escaped = false;
2742 let mut saw_any = false;
2743
2744 while let Some(ch) = self.peek_char() {
2745 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2746 break;
2747 }
2748
2749 saw_any = true;
2750 Self::push_capture_char(content, ch);
2751 self.advance();
2752
2753 if escaped {
2754 cooked.push(ch);
2755 escaped = false;
2756 continue;
2757 }
2758
2759 match ch {
2760 '\\' if !in_single => escaped = true,
2761 '\'' if !in_double => in_single = !in_single,
2762 '"' if !in_single => in_double = !in_double,
2763 _ => cooked.push(ch),
2764 }
2765 }
2766
2767 saw_any.then_some(cooked)
2768 }
2769
2770 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2771 Self::push_capture_char(content, '`');
2772 self.advance();
2773 while let Some(ch) = self.peek_char() {
2774 Self::push_capture_char(content, ch);
2775 self.advance();
2776 if ch == '\\' {
2777 if let Some(esc) = self.peek_char() {
2778 Self::push_capture_char(content, esc);
2779 self.advance();
2780 }
2781 continue;
2782 }
2783 if ch == '`' {
2784 break;
2785 }
2786 }
2787 }
2788
2789 fn read_command_subst_pending_heredoc_into(
2790 &mut self,
2791 content: &mut Option<String>,
2792 delimiter: &str,
2793 strip_tabs: bool,
2794 ) -> bool {
2795 loop {
2796 let mut line = String::new();
2797 let mut saw_newline = false;
2798
2799 while let Some(ch) = self.peek_char() {
2800 self.advance();
2801 if ch == '\n' {
2802 saw_newline = true;
2803 break;
2804 }
2805 line.push(ch);
2806 }
2807
2808 Self::push_capture_str(content, &line);
2809 if saw_newline {
2810 Self::push_capture_char(content, '\n');
2811 }
2812
2813 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2814 return true;
2815 }
2816 }
2817 }
2818
2819 fn read_command_subst_into_depth(
2820 &mut self,
2821 content: &mut Option<String>,
2822 subst_depth: usize,
2823 ) -> bool {
2824 if subst_depth >= self.max_subst_depth {
2825 let mut depth = 1;
2827 while let Some(c) = self.peek_char() {
2828 self.advance();
2829 match c {
2830 '(' => depth += 1,
2831 ')' => {
2832 depth -= 1;
2833 if depth == 0 {
2834 Self::push_capture_char(content, ')');
2835 return true;
2836 }
2837 }
2838 _ => {}
2839 }
2840 }
2841 return false;
2842 }
2843
2844 let mut depth = 1;
2845 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2846 let mut pending_case_headers = 0usize;
2847 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2848 let mut current_word = String::with_capacity(16);
2849 let mut at_command_start = true;
2850 let mut expecting_redirection_target = false;
2851 let mut current_word_started_at_command_start = false;
2852 while let Some(c) = self.peek_char() {
2853 match c {
2854 '#' if !self.should_treat_hash_as_word_char() => {
2855 let had_word = !current_word.is_empty();
2856 Self::flush_command_subst_keyword(
2857 &mut current_word,
2858 &mut pending_case_headers,
2859 &mut case_clause_depths,
2860 depth,
2861 &mut current_word_started_at_command_start,
2862 );
2863 if had_word && expecting_redirection_target {
2864 expecting_redirection_target = false;
2865 }
2866 Self::push_capture_char(content, '#');
2867 self.advance();
2868 while let Some(comment_ch) = self.peek_char() {
2869 Self::push_capture_char(content, comment_ch);
2870 self.advance();
2871 if comment_ch == '\n' {
2872 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2873 if !self.read_command_subst_pending_heredoc_into(
2874 content, &delimiter, strip_tabs,
2875 ) {
2876 return false;
2877 }
2878 }
2879 at_command_start = true;
2880 expecting_redirection_target = false;
2881 break;
2882 }
2883 }
2884 }
2885 '(' => {
2886 Self::flush_command_subst_keyword(
2887 &mut current_word,
2888 &mut pending_case_headers,
2889 &mut case_clause_depths,
2890 depth,
2891 &mut current_word_started_at_command_start,
2892 );
2893 depth += 1;
2894 Self::push_capture_char(content, c);
2895 self.advance();
2896 at_command_start = true;
2897 expecting_redirection_target = false;
2898 }
2899 ')' => {
2900 Self::flush_command_subst_keyword(
2901 &mut current_word,
2902 &mut pending_case_headers,
2903 &mut case_clause_depths,
2904 depth,
2905 &mut current_word_started_at_command_start,
2906 );
2907 if case_clause_depths
2908 .last()
2909 .is_some_and(|case_depth| *case_depth == depth)
2910 {
2911 Self::push_capture_char(content, ')');
2912 self.advance();
2913 at_command_start = true;
2914 expecting_redirection_target = false;
2915 continue;
2916 }
2917 depth -= 1;
2918 self.advance();
2919 if depth == 0 {
2920 Self::push_capture_char(content, ')');
2921 return true;
2922 }
2923 Self::push_capture_char(content, c);
2924 at_command_start = false;
2925 expecting_redirection_target = false;
2926 }
2927 '"' => {
2928 let had_word = !current_word.is_empty();
2929 Self::flush_command_subst_keyword(
2930 &mut current_word,
2931 &mut pending_case_headers,
2932 &mut case_clause_depths,
2933 depth,
2934 &mut current_word_started_at_command_start,
2935 );
2936 if had_word && expecting_redirection_target {
2937 expecting_redirection_target = false;
2938 }
2939 Self::push_capture_char(content, '"');
2941 self.advance();
2942 while let Some(qc) = self.peek_char() {
2943 match qc {
2944 '"' => {
2945 Self::push_capture_char(content, '"');
2946 self.advance();
2947 break;
2948 }
2949 '\\' => {
2950 Self::push_capture_char(content, '\\');
2951 self.advance();
2952 if let Some(esc) = self.peek_char() {
2953 Self::push_capture_char(content, esc);
2954 self.advance();
2955 }
2956 }
2957 '$' => {
2958 Self::push_capture_char(content, '$');
2959 self.advance();
2960 if self.peek_char() == Some('(') {
2961 if self.second_char() == Some('(') {
2962 if !self.read_arithmetic_expansion_into(content) {
2963 return false;
2964 }
2965 } else {
2966 Self::push_capture_char(content, '(');
2967 self.advance();
2968 if !self
2969 .read_command_subst_into_depth(content, subst_depth + 1)
2970 {
2971 return false;
2972 }
2973 }
2974 }
2975 }
2976 _ => {
2977 Self::push_capture_char(content, qc);
2978 self.advance();
2979 }
2980 }
2981 }
2982 if expecting_redirection_target {
2983 expecting_redirection_target = false;
2984 } else {
2985 at_command_start = false;
2986 }
2987 }
2988 '\'' => {
2989 let had_word = !current_word.is_empty();
2990 Self::flush_command_subst_keyword(
2991 &mut current_word,
2992 &mut pending_case_headers,
2993 &mut case_clause_depths,
2994 depth,
2995 &mut current_word_started_at_command_start,
2996 );
2997 if had_word && expecting_redirection_target {
2998 expecting_redirection_target = false;
2999 }
3000 Self::push_capture_char(content, '\'');
3002 self.advance();
3003 while let Some(qc) = self.peek_char() {
3004 Self::push_capture_char(content, qc);
3005 self.advance();
3006 if qc == '\'' {
3007 break;
3008 }
3009 }
3010 if expecting_redirection_target {
3011 expecting_redirection_target = false;
3012 } else {
3013 at_command_start = false;
3014 }
3015 }
3016 '`' => {
3017 let had_word = !current_word.is_empty();
3018 Self::flush_command_subst_keyword(
3019 &mut current_word,
3020 &mut pending_case_headers,
3021 &mut case_clause_depths,
3022 depth,
3023 &mut current_word_started_at_command_start,
3024 );
3025 if had_word && expecting_redirection_target {
3026 expecting_redirection_target = false;
3027 }
3028 self.read_command_subst_backtick_segment_into(content);
3029 if expecting_redirection_target {
3030 expecting_redirection_target = false;
3031 } else {
3032 at_command_start = false;
3033 }
3034 }
3035 '$' if self.second_char() == Some('\'') => {
3036 let had_word = !current_word.is_empty();
3037 Self::flush_command_subst_keyword(
3038 &mut current_word,
3039 &mut pending_case_headers,
3040 &mut case_clause_depths,
3041 depth,
3042 &mut current_word_started_at_command_start,
3043 );
3044 if had_word && expecting_redirection_target {
3045 expecting_redirection_target = false;
3046 }
3047 Self::push_capture_char(content, '$');
3048 self.advance();
3049 Self::push_capture_char(content, '\'');
3050 self.advance();
3051 while let Some(qc) = self.peek_char() {
3052 Self::push_capture_char(content, qc);
3053 self.advance();
3054 if qc == '\\' {
3055 if let Some(esc) = self.peek_char() {
3056 Self::push_capture_char(content, esc);
3057 self.advance();
3058 }
3059 continue;
3060 }
3061 if qc == '\'' {
3062 break;
3063 }
3064 }
3065 if expecting_redirection_target {
3066 expecting_redirection_target = false;
3067 } else {
3068 at_command_start = false;
3069 }
3070 }
3071 '\\' => {
3072 let had_word = !current_word.is_empty();
3073 Self::flush_command_subst_keyword(
3074 &mut current_word,
3075 &mut pending_case_headers,
3076 &mut case_clause_depths,
3077 depth,
3078 &mut current_word_started_at_command_start,
3079 );
3080 if had_word && expecting_redirection_target {
3081 expecting_redirection_target = false;
3082 }
3083 Self::push_capture_char(content, '\\');
3084 self.advance();
3085 if let Some(esc) = self.peek_char() {
3086 Self::push_capture_char(content, esc);
3087 self.advance();
3088 }
3089 if expecting_redirection_target {
3090 expecting_redirection_target = false;
3091 } else {
3092 at_command_start = false;
3093 }
3094 }
3095 '<' if self.second_char() == Some('<') => {
3096 let word_was_redirection_fd = current_word_started_at_command_start
3097 && !current_word.is_empty()
3098 && current_word.chars().all(|current| current.is_ascii_digit());
3099 Self::flush_command_subst_keyword(
3100 &mut current_word,
3101 &mut pending_case_headers,
3102 &mut case_clause_depths,
3103 depth,
3104 &mut current_word_started_at_command_start,
3105 );
3106 if word_was_redirection_fd {
3107 at_command_start = true;
3108 }
3109
3110 Self::push_capture_char(content, '<');
3111 self.advance();
3112 Self::push_capture_char(content, '<');
3113 self.advance();
3114
3115 if self.peek_char() == Some('<') {
3116 Self::push_capture_char(content, '<');
3117 self.advance();
3118 expecting_redirection_target = true;
3119 continue;
3120 }
3121
3122 let strip_tabs = if self.peek_char() == Some('-') {
3123 Self::push_capture_char(content, '-');
3124 self.advance();
3125 true
3126 } else {
3127 false
3128 };
3129
3130 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3131 {
3132 pending_heredocs.push((delimiter, strip_tabs));
3133 expecting_redirection_target = false;
3134 } else {
3135 expecting_redirection_target = true;
3136 }
3137 }
3138 '>' | '<' => {
3139 let word_was_redirection_fd = current_word_started_at_command_start
3140 && !current_word.is_empty()
3141 && current_word.chars().all(|current| current.is_ascii_digit());
3142 Self::flush_command_subst_keyword(
3143 &mut current_word,
3144 &mut pending_case_headers,
3145 &mut case_clause_depths,
3146 depth,
3147 &mut current_word_started_at_command_start,
3148 );
3149 if word_was_redirection_fd {
3150 at_command_start = true;
3151 }
3152 Self::push_capture_char(content, c);
3153 self.advance();
3154 expecting_redirection_target = true;
3155 }
3156 '\n' => {
3157 Self::flush_command_subst_keyword(
3158 &mut current_word,
3159 &mut pending_case_headers,
3160 &mut case_clause_depths,
3161 depth,
3162 &mut current_word_started_at_command_start,
3163 );
3164 Self::push_capture_char(content, '\n');
3165 self.advance();
3166 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3167 if !self.read_command_subst_pending_heredoc_into(
3168 content, &delimiter, strip_tabs,
3169 ) {
3170 return false;
3171 }
3172 }
3173 at_command_start = true;
3174 expecting_redirection_target = false;
3175 }
3176 _ => {
3177 if c.is_ascii_alphanumeric() || c == '_' {
3178 if current_word.is_empty()
3179 && !expecting_redirection_target
3180 && at_command_start
3181 {
3182 current_word_started_at_command_start = true;
3183 at_command_start = false;
3184 }
3185 current_word.push(c);
3186 } else {
3187 let had_word = !current_word.is_empty();
3188 Self::flush_command_subst_keyword(
3189 &mut current_word,
3190 &mut pending_case_headers,
3191 &mut case_clause_depths,
3192 depth,
3193 &mut current_word_started_at_command_start,
3194 );
3195 if had_word && expecting_redirection_target {
3196 expecting_redirection_target = false;
3197 }
3198 match c {
3199 ' ' | '\t' => {}
3200 ';' | '|' | '&' => {
3201 at_command_start = true;
3202 expecting_redirection_target = false;
3203 }
3204 _ => {
3205 if !expecting_redirection_target {
3206 at_command_start = false;
3207 }
3208 }
3209 }
3210 }
3211 Self::push_capture_char(content, c);
3212 self.advance();
3213 }
3214 }
3215 }
3216
3217 false
3218 }
3219
3220 fn read_param_expansion_into(
3224 &mut self,
3225 content: &mut Option<String>,
3226 segment_start: Position,
3227 ) -> bool {
3228 let mut borrowable = true;
3229 let mut depth = 1;
3230 let mut literal_brace_depth = 0usize;
3231 let mut in_single = false;
3232 let mut in_double = false;
3233 let mut double_quote_depth = 0usize;
3234 while let Some(c) = self.peek_char() {
3235 if in_single {
3236 match c {
3237 '\\' => {
3238 let escape_start = self.current_position();
3239 if self.second_char() == Some('"') {
3240 self.advance();
3241 borrowable = false;
3242 self.ensure_capture_from_source(content, segment_start, escape_start);
3243 Self::push_capture_char(content, '"');
3244 self.advance();
3245 } else {
3246 Self::push_capture_char(content, '\\');
3247 self.advance();
3248 }
3249 }
3250 '\'' => {
3251 Self::push_capture_char(content, c);
3252 self.advance();
3253 in_single = false;
3254 }
3255 _ => {
3256 Self::push_capture_char(content, c);
3257 self.advance();
3258 }
3259 }
3260 continue;
3261 }
3262
3263 match c {
3264 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3265 self.advance();
3266 Self::push_capture_char(content, '}');
3267 if depth == 1
3268 && literal_brace_depth > 0
3269 && self.has_later_top_level_param_expansion_closer(depth)
3270 {
3271 literal_brace_depth -= 1;
3272 continue;
3273 }
3274 depth -= 1;
3275 if depth == 0 {
3276 break;
3277 }
3278 }
3279 '{' if !in_single && !in_double => {
3280 literal_brace_depth += 1;
3281 Self::push_capture_char(content, '{');
3282 self.advance();
3283 }
3284 '"' => {
3285 Self::push_capture_char(content, '"');
3287 self.advance();
3288 in_double = !in_double;
3289 double_quote_depth = if in_double { depth } else { 0 };
3290 }
3291 '\'' => {
3292 Self::push_capture_char(content, '\'');
3293 self.advance();
3294 if !in_double {
3295 in_single = true;
3296 }
3297 }
3298 '\\' => {
3299 let escape_start = self.current_position();
3302 self.advance();
3303 if let Some(esc) = self.peek_char() {
3304 match esc {
3305 '$' => {
3306 borrowable = false;
3307 self.ensure_capture_from_source(
3308 content,
3309 segment_start,
3310 escape_start,
3311 );
3312 Self::push_capture_char(content, '\x00');
3313 Self::push_capture_char(content, '$');
3314 self.advance();
3315 }
3316 '"' | '\\' | '`' => {
3317 borrowable = false;
3318 self.ensure_capture_from_source(
3319 content,
3320 segment_start,
3321 escape_start,
3322 );
3323 Self::push_capture_char(content, esc);
3324 self.advance();
3325 }
3326 '}' => {
3327 Self::push_capture_char(content, '\\');
3329 Self::push_capture_char(content, '}');
3330 self.advance();
3331 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3332 }
3333 _ => {
3334 Self::push_capture_char(content, '\\');
3335 Self::push_capture_char(content, esc);
3336 self.advance();
3337 }
3338 }
3339 } else {
3340 Self::push_capture_char(content, '\\');
3341 }
3342 }
3343 '$' => {
3344 Self::push_capture_char(content, '$');
3345 self.advance();
3346 if self.peek_char() == Some('(') {
3347 if self.second_char() == Some('(') {
3348 if !self.read_arithmetic_expansion_into(content) {
3349 borrowable = false;
3350 }
3351 } else {
3352 Self::push_capture_char(content, '(');
3353 self.advance();
3354 self.read_command_subst_into(content);
3355 }
3356 } else if self.peek_char() == Some('{') {
3357 Self::push_capture_char(content, '{');
3358 self.advance();
3359 borrowable &= self.read_param_expansion_into(content, segment_start);
3360 }
3361 }
3362 _ => {
3363 Self::push_capture_char(content, c);
3364 self.advance();
3365 }
3366 }
3367 }
3368 borrowable
3369 }
3370
3371 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3372 let mut chars = self.lookahead_chars().peekable();
3373 let mut depth = target_depth;
3374 let mut in_single = false;
3375 let mut in_double = false;
3376 let mut double_quote_depth = 0usize;
3377
3378 while let Some(ch) = chars.next() {
3379 if in_single {
3380 match ch {
3381 '\'' => in_single = false,
3382 '\\' if chars.peek() == Some(&'"') => {
3383 chars.next();
3384 }
3385 '\\' => {}
3386 _ => {}
3387 }
3388 continue;
3389 }
3390
3391 if in_double {
3392 match ch {
3393 '"' => {
3394 in_double = false;
3395 double_quote_depth = 0;
3396 }
3397 '\\' => {
3398 chars.next();
3399 }
3400 '$' if chars.peek() == Some(&'{') => {
3401 chars.next();
3402 depth += 1;
3403 }
3404 '}' if depth > double_quote_depth => {
3405 depth -= 1;
3406 }
3407 _ => {}
3408 }
3409 continue;
3410 }
3411
3412 match ch {
3413 '\n' if depth == target_depth => return false,
3414 '\'' => in_single = true,
3415 '"' => {
3416 in_double = true;
3417 double_quote_depth = depth;
3418 }
3419 '\\' => {
3420 chars.next();
3421 }
3422 '$' if chars.peek() == Some(&'{') => {
3423 chars.next();
3424 depth += 1;
3425 }
3426 '}' => {
3427 if depth == target_depth {
3428 return true;
3429 }
3430 depth -= 1;
3431 }
3432 _ => {}
3433 }
3434 }
3435
3436 false
3437 }
3438
3439 fn looks_like_brace_expansion(&self) -> bool {
3445 const MAX_LOOKAHEAD: usize = 10_000;
3446
3447 let mut chars = self.lookahead_chars();
3448
3449 if chars.next() != Some('{') {
3451 return false;
3452 }
3453
3454 let mut depth = 1;
3455 let mut has_comma = false;
3456 let mut has_dot_dot = false;
3457 let mut prev_char = None;
3458 let mut scanned = 0usize;
3459
3460 for ch in chars {
3461 scanned += 1;
3462 if scanned > MAX_LOOKAHEAD {
3463 return false;
3464 }
3465 match ch {
3466 '{' => depth += 1,
3467 '}' => {
3468 depth -= 1;
3469 if depth == 0 {
3470 return has_comma || has_dot_dot;
3472 }
3473 }
3474 ',' if depth == 1 => has_comma = true,
3475 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3476 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3478 _ => {}
3479 }
3480 prev_char = Some(ch);
3481 }
3482
3483 false
3484 }
3485
3486 fn looks_like_mid_word_brace_segment(&self) -> bool {
3489 const MAX_LOOKAHEAD: usize = 10_000;
3490
3491 let mut chars = self.lookahead_chars();
3492 if chars.next() != Some('{') {
3493 return false;
3494 }
3495
3496 let mut brace_depth = 1;
3497 let mut paren_depth = 0usize;
3498 let mut escaped = false;
3499 let mut in_single = false;
3500 let mut in_double = false;
3501 let mut in_backtick = false;
3502 let mut prev_char = None;
3503 let mut scanned = 0usize;
3504
3505 for ch in chars {
3506 scanned += 1;
3507 if scanned > MAX_LOOKAHEAD {
3508 return false;
3509 }
3510
3511 if !in_single
3512 && !in_double
3513 && !in_backtick
3514 && !escaped
3515 && brace_depth == 1
3516 && paren_depth == 0
3517 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3518 {
3519 return false;
3520 }
3521
3522 if escaped {
3523 escaped = false;
3524 prev_char = Some(ch);
3525 continue;
3526 }
3527
3528 match ch {
3529 '\\' => escaped = true,
3530 '\'' if !in_double && !in_backtick => in_single = !in_single,
3531 '"' if !in_single && !in_backtick => in_double = !in_double,
3532 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3533 '(' if !in_single
3534 && !in_double
3535 && !in_backtick
3536 && (paren_depth > 0 || prev_char == Some('$')) =>
3537 {
3538 paren_depth += 1
3539 }
3540 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3541 paren_depth -= 1
3542 }
3543 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3544 '}' => {
3545 brace_depth -= 1;
3546 if brace_depth == 0 {
3547 return true;
3548 }
3549 }
3550 _ => {}
3551 }
3552
3553 prev_char = Some(ch);
3554 }
3555
3556 false
3557 }
3558
3559 fn is_brace_group_start(&self) -> bool {
3561 let mut chars = self.lookahead_chars();
3562 if chars.next() != Some('{') {
3564 return false;
3565 }
3566 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3568 }
3569
3570 fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3573 const MAX_LOOKAHEAD: usize = 10_000;
3574
3575 let mut chars = self.lookahead_chars();
3576 let mut depth = 1;
3577 let mut has_comma = false;
3578 let mut has_dot_dot = false;
3579 let mut prev_char = None;
3580 let mut scanned = 0usize;
3581
3582 for ch in chars.by_ref() {
3583 scanned += 1;
3584 if scanned > MAX_LOOKAHEAD {
3585 return false;
3586 }
3587 match ch {
3588 '{' => depth += 1,
3589 '}' => {
3590 depth -= 1;
3591 if depth == 0 {
3592 return has_comma || has_dot_dot;
3593 }
3594 }
3595 ',' if depth == 1 => has_comma = true,
3596 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3597 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3598 _ => {}
3599 }
3600 prev_char = Some(ch);
3601 }
3602
3603 false
3604 }
3605
3606 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3608 let mut word = String::with_capacity(16);
3609
3610 if let Some('{') = self.peek_char() {
3612 word.push('{');
3613 self.advance();
3614 } else {
3615 return None;
3616 }
3617
3618 let mut depth = 1;
3620 while let Some(ch) = self.peek_char() {
3621 word.push(ch);
3622 self.advance();
3623 match ch {
3624 '{' => depth += 1,
3625 '}' => {
3626 depth -= 1;
3627 if depth == 0 {
3628 break;
3629 }
3630 }
3631 _ => {}
3632 }
3633 }
3634
3635 while let Some(ch) = self.peek_char() {
3637 if Self::is_word_char(ch) {
3638 if self.reinject_buf.is_empty() {
3639 let chunk = self.cursor.eat_while(Self::is_word_char);
3640 word.push_str(chunk);
3641 self.advance_scanned_source_bytes(chunk.len());
3642 } else {
3643 word.push(ch);
3644 self.advance();
3645 }
3646 } else {
3647 break;
3648 }
3649 }
3650
3651 Some(LexedToken::owned_word(TokenKind::Word, word))
3652 }
3653
3654 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3656 let mut word = String::with_capacity(16);
3657
3658 if let Some('{') = self.peek_char() {
3660 word.push('{');
3661 self.advance();
3662 } else {
3663 return None;
3664 }
3665
3666 let mut depth = 1;
3668 while let Some(ch) = self.peek_char() {
3669 word.push(ch);
3670 self.advance();
3671 match ch {
3672 '{' => depth += 1,
3673 '}' => {
3674 depth -= 1;
3675 if depth == 0 {
3676 break;
3677 }
3678 }
3679 _ => {}
3680 }
3681 }
3682
3683 while let Some(ch) = self.peek_char() {
3685 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3686 if ch == '{' {
3687 word.push(ch);
3689 self.advance();
3690 let mut inner_depth = 1;
3691 while let Some(c) = self.peek_char() {
3692 word.push(c);
3693 self.advance();
3694 match c {
3695 '{' => inner_depth += 1,
3696 '}' => {
3697 inner_depth -= 1;
3698 if inner_depth == 0 {
3699 break;
3700 }
3701 }
3702 _ => {}
3703 }
3704 }
3705 } else {
3706 word.push(ch);
3707 self.advance();
3708 }
3709 } else {
3710 break;
3711 }
3712 }
3713
3714 Some(LexedToken::owned_word(TokenKind::Word, word))
3715 }
3716
3717 fn looks_like_assoc_assign(&self) -> bool {
3721 let mut chars = self.lookahead_chars();
3722 if chars.next() != Some('(') {
3724 return false;
3725 }
3726 for ch in chars {
3728 match ch {
3729 ' ' | '\t' => continue,
3730 '[' => return true,
3731 _ => return false,
3732 }
3733 }
3734 false
3735 }
3736
3737 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3738 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3739 }
3740
3741 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3742 word.segments().any(|segment| {
3743 matches!(
3744 segment.kind(),
3745 LexedWordSegmentKind::SingleQuoted
3746 | LexedWordSegmentKind::DollarSingleQuoted
3747 | LexedWordSegmentKind::DoubleQuoted
3748 | LexedWordSegmentKind::DollarDoubleQuoted
3749 )
3750 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3751 }
3752
3753 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3754 text.contains(['*', '?'])
3755 || text.ends_with('}') && text.contains("${")
3756 || text.ends_with(']')
3757 && text
3758 .rfind('[')
3759 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3760 }
3761
3762 fn is_word_char(ch: char) -> bool {
3763 !matches!(
3764 ch,
3765 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3766 )
3767 }
3768
3769 const fn is_ascii_word_byte(byte: u8) -> bool {
3770 !matches!(
3771 byte,
3772 b' ' | b'\t'
3773 | b'\n'
3774 | b';'
3775 | b'|'
3776 | b'&'
3777 | b'>'
3778 | b'<'
3779 | b'('
3780 | b')'
3781 | b'{'
3782 | b'}'
3783 | b'\''
3784 | b'"'
3785 )
3786 }
3787
3788 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3789 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3790 }
3791
3792 fn is_plain_word_char(ch: char) -> bool {
3793 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3794 }
3795
3796 pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3798 let mut content = String::with_capacity(64);
3799 let mut current_line = String::with_capacity(64);
3800
3801 let mut rest_of_line = String::with_capacity(32);
3808 let rest_of_line_start = self.current_position();
3809 let mut in_double_quote = false;
3810 let mut in_single_quote = false;
3811 let mut in_comment = false;
3812 let mut saw_non_whitespace_tail = false;
3813 let mut consecutive_backslashes = 0usize;
3814 let mut previous_tail_char = None;
3815 while let Some(ch) = self.peek_char() {
3816 self.advance();
3817 if in_comment {
3818 if ch == '\n' {
3819 break;
3820 }
3821 rest_of_line.push(ch);
3822 previous_tail_char = Some(ch);
3823 continue;
3824 }
3825 if ch == '#'
3826 && !in_single_quote
3827 && !in_double_quote
3828 && self.comments_enabled()
3829 && heredoc_tail_hash_starts_comment(previous_tail_char)
3830 {
3831 in_comment = true;
3832 rest_of_line.push(ch);
3833 previous_tail_char = Some(ch);
3834 consecutive_backslashes = 0;
3835 continue;
3836 }
3837 let backslash_continues_line = ch == '\\'
3838 && !in_single_quote
3839 && self.peek_char() == Some('\n')
3840 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3841 && consecutive_backslashes.is_multiple_of(2);
3842 if backslash_continues_line {
3843 rest_of_line.push(ch);
3844 rest_of_line.push('\n');
3845 self.advance();
3846 consecutive_backslashes = 0;
3847 continue;
3848 }
3849 if ch == '\n' && !in_double_quote && !in_single_quote {
3850 break;
3851 }
3852 if ch == '"' && !in_single_quote {
3853 in_double_quote = !in_double_quote;
3854 } else if ch == '\'' && !in_double_quote {
3855 in_single_quote = !in_single_quote;
3856 } else if ch == '\\' && in_double_quote {
3857 rest_of_line.push(ch);
3859 if let Some(next) = self.peek_char() {
3860 rest_of_line.push(next);
3861 self.advance();
3862 }
3863 continue;
3864 }
3865 rest_of_line.push(ch);
3866 if !ch.is_whitespace() {
3867 saw_non_whitespace_tail = true;
3868 }
3869 if ch == '\\' && !in_single_quote {
3870 consecutive_backslashes += 1;
3871 } else {
3872 consecutive_backslashes = 0;
3873 }
3874 previous_tail_char = Some(ch);
3875 }
3876
3877 self.sync_offset_to_cursor();
3881 let content_start = self.current_position();
3882 let mut current_line_start = content_start;
3883 let content_end;
3884
3885 loop {
3887 if self.reinject_buf.is_empty() {
3888 self.sync_offset_to_cursor();
3894 let rest = self.cursor.rest();
3895 if rest.is_empty() {
3896 content_end = self.current_position();
3897 break;
3898 }
3899
3900 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3901 let line = &rest[..line_len];
3902 let has_newline = line_len < rest.len();
3903
3904 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3905 content_end = current_line_start;
3906 self.consume_source_bytes(line_len);
3907 if has_newline {
3908 self.consume_ascii_chars(1);
3909 }
3910 break;
3911 }
3912
3913 content.push_str(line);
3914 self.consume_source_bytes(line_len);
3915
3916 if has_newline {
3917 self.consume_ascii_chars(1);
3918 content.push('\n');
3919 current_line_start = self.current_position();
3920 continue;
3921 }
3922
3923 content_end = self.current_position();
3924 break;
3925 }
3926
3927 match self.peek_char() {
3928 Some('\n') => {
3929 self.advance();
3930 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
3932 content_end = current_line_start;
3933 break;
3934 }
3935 content.push_str(¤t_line);
3936 content.push('\n');
3937 current_line.clear();
3938 current_line_start = self.current_position();
3939 }
3940 Some(ch) => {
3941 current_line.push(ch);
3942 self.advance();
3943 }
3944 None => {
3945 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
3947 content_end = current_line_start;
3948 break;
3949 }
3950 if !current_line.is_empty() {
3951 content.push_str(¤t_line);
3952 }
3953 content_end = self.current_position();
3954 break;
3955 }
3956 }
3957 }
3958
3959 let post_heredoc_offset = self.offset;
3964 self.offset = rest_of_line_start.offset;
3965 for ch in rest_of_line.chars() {
3966 self.reinject_buf.push_back(ch);
3967 }
3968 self.reinject_buf.push_back('\n');
3969 self.reinject_resume_offset = Some(post_heredoc_offset);
3970
3971 HeredocRead {
3972 content,
3973 content_span: Span::from_positions(content_start, content_end),
3974 }
3975 }
3976
3977 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
3978 let mut chars = self.cursor.rest().chars();
3979 if chars.next() != Some('\n') {
3980 return false;
3981 }
3982
3983 for ch in chars {
3984 if matches!(ch, ' ' | '\t') {
3985 continue;
3986 }
3987 if ch == '\n' {
3988 return false;
3989 }
3990 return matches!(ch, '|' | '&' | ';' | '<' | '>')
3991 || (ch == '#' && self.comments_enabled());
3992 }
3993
3994 false
3995 }
3996}
3997
3998fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
3999 let line = if strip_tabs {
4000 line.trim_start_matches('\t')
4001 } else {
4002 line
4003 };
4004
4005 if line == delimiter {
4006 return true;
4007 }
4008
4009 let Some(trailing) = line.strip_prefix(delimiter) else {
4010 return false;
4011 };
4012
4013 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4014}
4015
4016fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4017 previous_tail_char.is_none_or(|prev| {
4018 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4019 })
4020}
4021
4022fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4023 let ch = input.get(index..)?.chars().next()?;
4024 Some((ch, index + ch.len_utf8()))
4025}
4026
4027fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4028 let mut index = 0usize;
4029 let mut depth = 0usize;
4030 let mut in_single = false;
4031 let mut in_double = false;
4032 let mut in_backtick = false;
4033 let mut escaped = false;
4034
4035 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4036 let was_escaped = escaped;
4037 if ch == '\\' && !in_single {
4038 escaped = !escaped;
4039 index = next_index;
4040 continue;
4041 }
4042 escaped = false;
4043
4044 match ch {
4045 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4046 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4047 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4048 '(' if !in_single
4049 && !in_double
4050 && !in_backtick
4051 && !was_escaped
4052 && prefix[next_index..].starts_with('(') =>
4053 {
4054 depth += 1;
4055 index = next_index + '('.len_utf8();
4056 continue;
4057 }
4058 ')' if !in_single
4059 && !in_double
4060 && !in_backtick
4061 && !was_escaped
4062 && prefix[next_index..].starts_with(')') =>
4063 {
4064 depth = depth.saturating_sub(1);
4065 index = next_index + ')'.len_utf8();
4066 continue;
4067 }
4068 _ => {}
4069 }
4070
4071 index = next_index;
4072 }
4073
4074 depth > 0
4075}
4076
4077fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4078 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4079 let prefix = &input[line_start..index];
4080 line_has_unclosed_double_paren(prefix)
4081}
4082
4083fn hash_starts_comment(input: &str, index: usize) -> bool {
4084 if inside_unclosed_double_paren_on_line(input, index) {
4085 return false;
4086 }
4087
4088 let next = &input[index + '#'.len_utf8()..];
4089 input[..index]
4090 .chars()
4091 .next_back()
4092 .is_none_or(|prev| match prev {
4093 '(' => {
4094 let whitespace_index = next.find(char::is_whitespace);
4095 let close_index = next.find(')');
4096
4097 match (whitespace_index, close_index) {
4098 (Some(whitespace), Some(close)) => whitespace < close,
4099 (Some(_), None) | (None, None) => true,
4100 (None, Some(_)) => false,
4101 }
4102 }
4103 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4104 })
4105}
4106
4107fn heredoc_delimiter_is_terminator(
4108 ch: char,
4109 in_single: bool,
4110 in_double: bool,
4111 escaped: bool,
4112) -> bool {
4113 !in_single
4114 && !in_double
4115 && !escaped
4116 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4117}
4118
4119fn scan_double_quoted_command_substitution_segment(
4120 input: &str,
4121 mut index: usize,
4122 subst_depth: usize,
4123) -> Option<usize> {
4124 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4125 match ch {
4126 '"' => return Some(next_index),
4127 '\\' => {
4128 index = next_index;
4129 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4130 index = escaped_next;
4131 }
4132 }
4133 '$' if input[next_index..].starts_with('{') => {
4134 let consumed = scan_command_subst_parameter_expansion_len(
4135 &input[next_index + '{'.len_utf8()..],
4136 subst_depth,
4137 )?;
4138 index = next_index + '{'.len_utf8() + consumed;
4139 }
4140 '$' if input[next_index..].starts_with('(')
4141 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4142 {
4143 let consumed = scan_command_substitution_body_len_inner(
4144 &input[next_index + '('.len_utf8()..],
4145 subst_depth + 1,
4146 )?;
4147 index = next_index + '('.len_utf8() + consumed;
4148 }
4149 _ => index = next_index,
4150 }
4151 }
4152
4153 None
4154}
4155
4156fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4157 let mut index = 0usize;
4158 let mut in_single = false;
4159 let mut in_double = false;
4160 let mut in_ansi_c_single = false;
4161 let mut in_backtick = false;
4162 let mut escaped = false;
4163 let mut ansi_c_quote_pending = false;
4164
4165 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4166 let was_escaped = escaped;
4167 if ch == '\\' && !in_single {
4168 escaped = !escaped;
4169 index = next_index;
4170 ansi_c_quote_pending = false;
4171 continue;
4172 }
4173 escaped = false;
4174
4175 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4176 if input[next_index..].starts_with('{')
4177 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4178 &input[next_index + '{'.len_utf8()..],
4179 subst_depth,
4180 )
4181 {
4182 index = next_index + '{'.len_utf8() + consumed;
4183 ansi_c_quote_pending = false;
4184 continue;
4185 }
4186
4187 if input[next_index..].starts_with('(')
4188 && !input[next_index + '('.len_utf8()..].starts_with('(')
4189 && let Some(consumed) = scan_command_substitution_body_len_inner(
4190 &input[next_index + '('.len_utf8()..],
4191 subst_depth + 1,
4192 )
4193 {
4194 index = next_index + '('.len_utf8() + consumed;
4195 ansi_c_quote_pending = false;
4196 continue;
4197 }
4198 }
4199
4200 if !in_single
4201 && !in_ansi_c_single
4202 && !in_double
4203 && !in_backtick
4204 && !was_escaped
4205 && matches!(ch, '<' | '>')
4206 && input[next_index..].starts_with('(')
4207 && let Some(consumed) = scan_command_substitution_body_len_inner(
4208 &input[next_index + '('.len_utf8()..],
4209 subst_depth + 1,
4210 )
4211 {
4212 index = next_index + '('.len_utf8() + consumed;
4213 ansi_c_quote_pending = false;
4214 continue;
4215 }
4216
4217 match ch {
4218 '\'' if !in_double && !in_backtick && !was_escaped => {
4219 if in_ansi_c_single {
4220 in_ansi_c_single = false;
4221 } else if !in_single && ansi_c_quote_pending {
4222 in_ansi_c_single = true;
4223 } else {
4224 in_single = !in_single;
4225 }
4226 }
4227 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4228 in_double = !in_double
4229 }
4230 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4231 in_backtick = !in_backtick
4232 }
4233 '}' if !in_single
4234 && !in_ansi_c_single
4235 && !in_double
4236 && !in_backtick
4237 && !was_escaped =>
4238 {
4239 return Some(next_index);
4240 }
4241 _ => {}
4242 }
4243
4244 ansi_c_quote_pending = ch == '$'
4245 && !in_single
4246 && !in_ansi_c_single
4247 && !in_double
4248 && !in_backtick
4249 && !was_escaped;
4250 index = next_index;
4251 }
4252
4253 None
4254}
4255
4256fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4257 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4258 if !matches!(ch, ' ' | '\t') {
4259 break;
4260 }
4261 index = next_index;
4262 }
4263
4264 let start = index;
4265 let mut cooked = String::new();
4266 let mut in_single = false;
4267 let mut in_double = false;
4268 let mut escaped = false;
4269
4270 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4271 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4272 break;
4273 }
4274
4275 index = next_index;
4276 if escaped {
4277 cooked.push(ch);
4278 escaped = false;
4279 continue;
4280 }
4281
4282 match ch {
4283 '\\' if !in_single => escaped = true,
4284 '\'' if !in_double => in_single = !in_single,
4285 '"' if !in_single => in_double = !in_double,
4286 _ => cooked.push(ch),
4287 }
4288 }
4289
4290 (index > start).then_some((index, cooked))
4291}
4292
4293fn skip_command_subst_pending_heredoc(
4294 input: &str,
4295 mut index: usize,
4296 delimiter: &str,
4297 strip_tabs: bool,
4298) -> usize {
4299 while index <= input.len() {
4300 let rest = &input[index..];
4301 let line_len = rest.find('\n').unwrap_or(rest.len());
4302 let line = &rest[..line_len];
4303 let has_newline = line_len < rest.len();
4304
4305 index += line_len;
4306 if has_newline {
4307 index += '\n'.len_utf8();
4308 }
4309
4310 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4311 return index;
4312 }
4313 }
4314
4315 index
4316}
4317
4318fn scan_command_subst_ansi_c_single_quoted_segment(
4319 input: &str,
4320 quote_index: usize,
4321) -> Option<usize> {
4322 let mut index = quote_index + '\''.len_utf8();
4323
4324 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4325 index = next_index;
4326 if ch == '\\' {
4327 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4328 index = escaped_next;
4329 }
4330 continue;
4331 }
4332
4333 if ch == '\'' {
4334 return Some(index);
4335 }
4336 }
4337
4338 None
4339}
4340
4341fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4342 let mut index = start;
4343
4344 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4345 index = next_index;
4346 if ch == '\\' {
4347 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4348 index = escaped_next;
4349 }
4350 continue;
4351 }
4352
4353 if ch == '`' {
4354 return Some(index);
4355 }
4356 }
4357
4358 None
4359}
4360
4361fn flush_scanned_command_subst_keyword(
4362 current_word: &mut String,
4363 pending_case_headers: &mut usize,
4364 case_clause_depths: &mut SmallVec<[usize; 4]>,
4365 depth: usize,
4366 word_started_at_command_start: &mut bool,
4367) {
4368 if current_word.is_empty() {
4369 *word_started_at_command_start = false;
4370 return;
4371 }
4372
4373 match current_word.as_str() {
4374 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4375 "in" if *pending_case_headers > 0 => {
4376 *pending_case_headers -= 1;
4377 case_clause_depths.push(depth);
4378 }
4379 "esac" if *word_started_at_command_start => {
4380 case_clause_depths.pop();
4381 }
4382 _ => {}
4383 }
4384
4385 current_word.clear();
4386 *word_started_at_command_start = false;
4387}
4388
4389fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4390 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4391 return None;
4392 }
4393
4394 let mut index = 0usize;
4395 let mut depth = 1;
4396 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4397 let mut pending_case_headers = 0usize;
4398 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4399 let mut current_word = String::with_capacity(16);
4400 let mut at_command_start = true;
4401 let mut expecting_redirection_target = false;
4402 let mut current_word_started_at_command_start = false;
4403
4404 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4405 match ch {
4406 '#' if hash_starts_comment(input, index) => {
4407 let had_word = !current_word.is_empty();
4408 flush_scanned_command_subst_keyword(
4409 &mut current_word,
4410 &mut pending_case_headers,
4411 &mut case_clause_depths,
4412 depth,
4413 &mut current_word_started_at_command_start,
4414 );
4415 if had_word && expecting_redirection_target {
4416 expecting_redirection_target = false;
4417 }
4418 index = next_index;
4419 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4420 index = comment_next;
4421 if comment_ch == '\n' {
4422 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4423 index = skip_command_subst_pending_heredoc(
4424 input, index, &delimiter, strip_tabs,
4425 );
4426 }
4427 at_command_start = true;
4428 expecting_redirection_target = false;
4429 break;
4430 }
4431 }
4432 }
4433 '(' => {
4434 flush_scanned_command_subst_keyword(
4435 &mut current_word,
4436 &mut pending_case_headers,
4437 &mut case_clause_depths,
4438 depth,
4439 &mut current_word_started_at_command_start,
4440 );
4441 depth += 1;
4442 index = next_index;
4443 at_command_start = true;
4444 expecting_redirection_target = false;
4445 }
4446 ')' => {
4447 flush_scanned_command_subst_keyword(
4448 &mut current_word,
4449 &mut pending_case_headers,
4450 &mut case_clause_depths,
4451 depth,
4452 &mut current_word_started_at_command_start,
4453 );
4454 if case_clause_depths
4455 .last()
4456 .is_some_and(|case_depth| *case_depth == depth)
4457 {
4458 index = next_index;
4459 at_command_start = true;
4460 expecting_redirection_target = false;
4461 continue;
4462 }
4463 depth -= 1;
4464 index = next_index;
4465 if depth == 0 {
4466 return Some(index);
4467 }
4468 at_command_start = false;
4469 expecting_redirection_target = false;
4470 }
4471 '"' => {
4472 let had_word = !current_word.is_empty();
4473 flush_scanned_command_subst_keyword(
4474 &mut current_word,
4475 &mut pending_case_headers,
4476 &mut case_clause_depths,
4477 depth,
4478 &mut current_word_started_at_command_start,
4479 );
4480 if had_word && expecting_redirection_target {
4481 expecting_redirection_target = false;
4482 }
4483 index = scan_double_quoted_command_substitution_segment(
4484 input,
4485 next_index,
4486 subst_depth,
4487 )?;
4488 if expecting_redirection_target {
4489 expecting_redirection_target = false;
4490 } else {
4491 at_command_start = false;
4492 }
4493 }
4494 '\'' => {
4495 let had_word = !current_word.is_empty();
4496 flush_scanned_command_subst_keyword(
4497 &mut current_word,
4498 &mut pending_case_headers,
4499 &mut case_clause_depths,
4500 depth,
4501 &mut current_word_started_at_command_start,
4502 );
4503 if had_word && expecting_redirection_target {
4504 expecting_redirection_target = false;
4505 }
4506 index = next_index;
4507 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4508 index = quoted_next;
4509 if quoted_ch == '\'' {
4510 break;
4511 }
4512 }
4513 if expecting_redirection_target {
4514 expecting_redirection_target = false;
4515 } else {
4516 at_command_start = false;
4517 }
4518 }
4519 '`' => {
4520 let had_word = !current_word.is_empty();
4521 flush_scanned_command_subst_keyword(
4522 &mut current_word,
4523 &mut pending_case_headers,
4524 &mut case_clause_depths,
4525 depth,
4526 &mut current_word_started_at_command_start,
4527 );
4528 if had_word && expecting_redirection_target {
4529 expecting_redirection_target = false;
4530 }
4531 index = scan_command_subst_backtick_segment(input, next_index)?;
4532 if expecting_redirection_target {
4533 expecting_redirection_target = false;
4534 } else {
4535 at_command_start = false;
4536 }
4537 }
4538 '$' if input[next_index..].starts_with('\'') => {
4539 let had_word = !current_word.is_empty();
4540 flush_scanned_command_subst_keyword(
4541 &mut current_word,
4542 &mut pending_case_headers,
4543 &mut case_clause_depths,
4544 depth,
4545 &mut current_word_started_at_command_start,
4546 );
4547 if had_word && expecting_redirection_target {
4548 expecting_redirection_target = false;
4549 }
4550 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4551 if expecting_redirection_target {
4552 expecting_redirection_target = false;
4553 } else {
4554 at_command_start = false;
4555 }
4556 }
4557 '\\' => {
4558 let had_word = !current_word.is_empty();
4559 flush_scanned_command_subst_keyword(
4560 &mut current_word,
4561 &mut pending_case_headers,
4562 &mut case_clause_depths,
4563 depth,
4564 &mut current_word_started_at_command_start,
4565 );
4566 if had_word && expecting_redirection_target {
4567 expecting_redirection_target = false;
4568 }
4569 index = next_index;
4570 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4571 index = escaped_next;
4572 }
4573 if expecting_redirection_target {
4574 expecting_redirection_target = false;
4575 } else {
4576 at_command_start = false;
4577 }
4578 }
4579 '>' => {
4580 let word_was_redirection_fd = current_word_started_at_command_start
4581 && !current_word.is_empty()
4582 && current_word.chars().all(|current| current.is_ascii_digit());
4583 flush_scanned_command_subst_keyword(
4584 &mut current_word,
4585 &mut pending_case_headers,
4586 &mut case_clause_depths,
4587 depth,
4588 &mut current_word_started_at_command_start,
4589 );
4590 if word_was_redirection_fd {
4591 at_command_start = true;
4592 }
4593 index = next_index;
4594 expecting_redirection_target = true;
4595 }
4596 '<' if input[next_index..].starts_with('<') => {
4597 let word_was_redirection_fd = current_word_started_at_command_start
4598 && !current_word.is_empty()
4599 && current_word.chars().all(|current| current.is_ascii_digit());
4600 let had_word = !current_word.is_empty();
4601 flush_scanned_command_subst_keyword(
4602 &mut current_word,
4603 &mut pending_case_headers,
4604 &mut case_clause_depths,
4605 depth,
4606 &mut current_word_started_at_command_start,
4607 );
4608 if had_word && expecting_redirection_target {
4609 expecting_redirection_target = false;
4610 }
4611 if word_was_redirection_fd {
4612 at_command_start = true;
4613 }
4614 if inside_unclosed_double_paren_on_line(input, index) {
4615 index = next_index + '<'.len_utf8();
4616 continue;
4617 }
4618
4619 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4620 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4621 expecting_redirection_target = true;
4622 continue;
4623 }
4624
4625 let strip_tabs = input[next_index..].starts_with("<-");
4626 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4627 if let Some((delimiter_index, delimiter)) =
4628 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4629 {
4630 pending_heredocs.push((delimiter, strip_tabs));
4631 index = delimiter_index;
4632 expecting_redirection_target = false;
4633 } else {
4634 index = next_index;
4635 expecting_redirection_target = true;
4636 }
4637 }
4638 '\n' => {
4639 flush_scanned_command_subst_keyword(
4640 &mut current_word,
4641 &mut pending_case_headers,
4642 &mut case_clause_depths,
4643 depth,
4644 &mut current_word_started_at_command_start,
4645 );
4646 index = next_index;
4647 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4648 index =
4649 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4650 }
4651 at_command_start = true;
4652 expecting_redirection_target = false;
4653 }
4654 '$' if input[next_index..].starts_with('{') => {
4655 let had_word = !current_word.is_empty();
4656 flush_scanned_command_subst_keyword(
4657 &mut current_word,
4658 &mut pending_case_headers,
4659 &mut case_clause_depths,
4660 depth,
4661 &mut current_word_started_at_command_start,
4662 );
4663 if had_word && expecting_redirection_target {
4664 expecting_redirection_target = false;
4665 }
4666 let consumed = scan_command_subst_parameter_expansion_len(
4667 &input[next_index + '{'.len_utf8()..],
4668 subst_depth,
4669 )?;
4670 index = next_index + '{'.len_utf8() + consumed;
4671 if expecting_redirection_target {
4672 expecting_redirection_target = false;
4673 } else {
4674 at_command_start = false;
4675 }
4676 }
4677 '$' if input[next_index..].starts_with('(')
4678 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4679 {
4680 let had_word = !current_word.is_empty();
4681 flush_scanned_command_subst_keyword(
4682 &mut current_word,
4683 &mut pending_case_headers,
4684 &mut case_clause_depths,
4685 depth,
4686 &mut current_word_started_at_command_start,
4687 );
4688 if had_word && expecting_redirection_target {
4689 expecting_redirection_target = false;
4690 }
4691 let consumed = scan_command_substitution_body_len_inner(
4692 &input[next_index + '('.len_utf8()..],
4693 subst_depth + 1,
4694 )?;
4695 index = next_index + '('.len_utf8() + consumed;
4696 if expecting_redirection_target {
4697 expecting_redirection_target = false;
4698 } else {
4699 at_command_start = false;
4700 }
4701 }
4702 _ => {
4703 if ch.is_ascii_alphanumeric() || ch == '_' {
4704 if current_word.is_empty() && !expecting_redirection_target && at_command_start
4705 {
4706 current_word_started_at_command_start = true;
4707 at_command_start = false;
4708 }
4709 current_word.push(ch);
4710 } else {
4711 let had_word = !current_word.is_empty();
4712 flush_scanned_command_subst_keyword(
4713 &mut current_word,
4714 &mut pending_case_headers,
4715 &mut case_clause_depths,
4716 depth,
4717 &mut current_word_started_at_command_start,
4718 );
4719 if had_word && expecting_redirection_target {
4720 expecting_redirection_target = false;
4721 }
4722 match ch {
4723 ' ' | '\t' => {}
4724 ';' | '|' | '&' => {
4725 at_command_start = true;
4726 expecting_redirection_target = false;
4727 }
4728 _ => {
4729 if !expecting_redirection_target {
4730 at_command_start = false;
4731 }
4732 }
4733 }
4734 }
4735 index = next_index;
4736 }
4737 }
4738 }
4739
4740 None
4741}
4742
4743pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4744 scan_command_substitution_body_len_inner(input, 0)
4745}
4746
4747#[cfg(test)]
4748mod tests {
4749 use super::*;
4750
4751 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4752 match token.kind {
4753 kind if kind.is_word_like() => token.word_string(),
4754 TokenKind::Comment => token
4755 .span
4756 .slice(source)
4757 .strip_prefix('#')
4758 .map(str::to_string),
4759 TokenKind::Error => token
4760 .error_kind()
4761 .map(LexerErrorKind::message)
4762 .map(str::to_string),
4763 _ => None,
4764 }
4765 }
4766
4767 fn assert_next_token(
4768 lexer: &mut Lexer<'_>,
4769 expected_kind: TokenKind,
4770 expected_text: Option<&str>,
4771 ) {
4772 let token = lexer.next_lexed_token().unwrap();
4773 assert_eq!(token.kind, expected_kind);
4774 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4775 }
4776
4777 fn assert_next_token_with_comments(
4778 lexer: &mut Lexer<'_>,
4779 expected_kind: TokenKind,
4780 expected_text: Option<&str>,
4781 ) {
4782 let token = lexer.next_lexed_token_with_comments().unwrap();
4783 assert_eq!(token.kind, expected_kind);
4784 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4785 }
4786
4787 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4788 let mut lexer = Lexer::new(input);
4789
4790 while let Some(token) = lexer.next_lexed_token() {
4791 if token.kind == TokenKind::Newline {
4792 continue;
4793 }
4794
4795 assert_eq!(
4796 token.span.start.line, token.span.end.line,
4797 "token should stay on one line: {:?}",
4798 token
4799 );
4800 }
4801 }
4802
4803 #[test]
4804 fn test_simple_words() {
4805 let mut lexer = Lexer::new("echo hello world");
4806
4807 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4808 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4809 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4810 assert!(lexer.next_lexed_token().is_none());
4811 }
4812
4813 #[test]
4814 fn test_single_quoted_string() {
4815 let mut lexer = Lexer::new("echo 'hello world'");
4816
4817 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4818 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4820 assert!(lexer.next_lexed_token().is_none());
4821 }
4822
4823 #[test]
4824 fn test_double_quoted_string() {
4825 let mut lexer = Lexer::new("echo \"hello world\"");
4826
4827 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4828 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4829 assert!(lexer.next_lexed_token().is_none());
4830 }
4831
4832 #[test]
4833 fn test_double_quoted_expansion_token_keeps_source_backing() {
4834 let source = r#""$bar""#;
4835 let mut lexer = Lexer::new(source);
4836
4837 let token = lexer.next_lexed_token().unwrap();
4838 assert_eq!(token.kind, TokenKind::QuotedWord);
4839 assert_eq!(token.word_text(), Some("$bar"));
4840
4841 let word = token.word().unwrap();
4842 let segment = word.single_segment().unwrap();
4843 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4844 assert_eq!(segment.span().unwrap().slice(source), "$bar");
4845 }
4846
4847 #[test]
4848 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4849 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4850 let mut lexer = Lexer::new(source);
4851
4852 let token = lexer.next_lexed_token().unwrap();
4853 assert_eq!(token.kind, TokenKind::QuotedWord);
4854 assert_eq!(
4855 token.word_text(),
4856 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4857 );
4858 }
4859
4860 #[test]
4861 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4862 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4863 let mut lexer = Lexer::new(source);
4864
4865 let token = lexer.next_lexed_token().unwrap();
4866 assert_eq!(token.kind, TokenKind::QuotedWord);
4867 assert_eq!(
4868 token.word_text(),
4869 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4870 );
4871 }
4872
4873 #[test]
4874 fn test_mixed_word_keeps_segment_kinds() {
4875 let source = r#"foo"bar"'baz'"#;
4876 let mut lexer = Lexer::new(source);
4877
4878 let token = lexer.next_lexed_token().unwrap();
4879 assert_eq!(token.kind, TokenKind::Word);
4880
4881 let word = token.word().unwrap();
4882 let segments: Vec<_> = word
4883 .segments()
4884 .map(|segment| (segment.kind(), segment.as_str().to_string()))
4885 .collect();
4886
4887 assert_eq!(
4888 segments,
4889 vec![
4890 (LexedWordSegmentKind::Plain, "foo".to_string()),
4891 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4892 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4893 ]
4894 );
4895 assert_eq!(word.joined_text(), "foobarbaz");
4896 assert_eq!(
4897 word.segments()
4898 .next()
4899 .and_then(LexedWordSegment::span)
4900 .unwrap()
4901 .slice(source),
4902 "foo"
4903 );
4904 }
4905
4906 #[test]
4907 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
4908 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
4909
4910 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4911 let body = &source[..consumed];
4912
4913 assert!(body.contains("field, direction"));
4914 assert!(body.ends_with(')'));
4915 }
4916
4917 #[test]
4918 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
4919 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
4920
4921 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4922 let body = &source[..consumed];
4923
4924 assert!(body.contains("printf '%s' y"));
4925 assert!(body.ends_with(')'));
4926 }
4927
4928 #[test]
4929 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
4930 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
4931
4932 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4933 let body = &source[..consumed];
4934
4935 assert!(body.contains("printf %s 1,2"));
4936 assert!(body.ends_with(')'));
4937 }
4938
4939 #[test]
4940 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
4941 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
4942
4943 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4944 let body = &source[..consumed];
4945
4946 assert!(body.contains("field, direction"));
4947 assert!(body.ends_with(')'));
4948 }
4949
4950 #[test]
4951 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
4952 let source = "printf %s ${x//foo/)},1)\"";
4953
4954 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4955 let body = &source[..consumed];
4956
4957 assert!(body.contains("${x//foo/)},1"));
4958 assert!(body.ends_with(')'));
4959 }
4960
4961 #[test]
4962 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
4963 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
4964
4965 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4966 let body = &source[..consumed];
4967
4968 assert!(body.contains("printf %s 1,2"));
4969 assert!(body.ends_with(')'));
4970 }
4971
4972 #[test]
4973 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
4974 let source = "[[ \"$buf\" == (#b)(*) ]]";
4975 let index = source.find('#').expect("expected hash");
4976
4977 assert!(!hash_starts_comment(source, index));
4978 }
4979
4980 #[test]
4981 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
4982 let source = "(#comment with )";
4983 let index = source.find('#').expect("expected hash");
4984
4985 assert!(hash_starts_comment(source, index));
4986 }
4987
4988 #[test]
4989 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
4990 let source = "(( #c < 256 ))";
4991 let index = source.find('#').expect("expected hash");
4992
4993 assert!(!hash_starts_comment(source, index));
4994 }
4995
4996 #[test]
4997 fn test_hash_starts_comment_respects_quoted_double_parens() {
4998 let source = "printf '((' # comment";
4999 let index = source.find('#').expect("expected hash");
5000
5001 assert!(hash_starts_comment(source, index));
5002 }
5003
5004 #[test]
5005 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5006 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5007
5008 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5009 let body = &source[..consumed];
5010
5011 assert!(body.contains("printf %s 1,2"));
5012 assert!(body.ends_with(')'));
5013 }
5014
5015 #[test]
5016 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5017 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5018
5019 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5020 let body = &source[..consumed];
5021
5022 assert!(body.contains("printf %s 1,2"));
5023 assert!(body.ends_with(')'));
5024 }
5025
5026 #[test]
5027 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5028 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5029
5030 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5031 let body = &source[..consumed];
5032
5033 assert!(body.contains("printf %s 1,2"));
5034 assert!(body.ends_with(')'));
5035 }
5036
5037 #[test]
5038 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5039 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5040
5041 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5042 let body = &source[..consumed];
5043
5044 assert!(body.contains("printf %s 1,2"));
5045 assert!(body.ends_with("))"));
5046 }
5047
5048 #[test]
5049 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5050 let source = "printf %s 1,2; echo case in)\"";
5051
5052 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5053 let body = &source[..consumed];
5054
5055 assert!(body.contains("echo case in"));
5056 assert!(body.ends_with(')'));
5057 }
5058
5059 #[test]
5060 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5061 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5062
5063 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5064 let body = &source[..consumed];
5065
5066 assert!(body.contains("$'a\\'b'"));
5067 assert!(body.contains("printf %s 1,2"));
5068 assert!(body.ends_with(')'));
5069 }
5070
5071 #[test]
5072 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5073 let source = "printf %s `echo foo)`; printf %s ok)\"";
5074
5075 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5076 let body = &source[..consumed];
5077
5078 assert!(body.contains("`echo foo)`"));
5079 assert!(body.contains("printf %s ok"));
5080 assert!(body.ends_with(')'));
5081 }
5082
5083 #[test]
5084 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5085 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5086
5087 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5088 let body = &source[..consumed];
5089
5090 assert!(body.contains("${x/`echo }`/foo)},1"));
5091 assert!(body.ends_with(')'));
5092 }
5093
5094 #[test]
5095 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5096 {
5097 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5098
5099 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5100 let body = &source[..consumed];
5101
5102 assert!(body.contains("${x/<(echo })/foo)},1"));
5103 assert!(body.ends_with(')'));
5104 }
5105
5106 #[test]
5107 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5108 let source = "printf %s 1,2; echo case in)";
5109
5110 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5111 let body = &source[..consumed];
5112
5113 assert_eq!(body, source);
5114 }
5115
5116 #[test]
5117 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5118 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5119
5120 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5121 let body = &source[..consumed];
5122
5123 assert_eq!(body, source);
5124 }
5125
5126 #[test]
5127 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5128 let source = "printf %s `echo foo)`; printf %s ok)";
5129
5130 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5131 let body = &source[..consumed];
5132
5133 assert_eq!(body, source);
5134 }
5135
5136 #[test]
5137 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5138 let source = "echo \"$line\" | cut -d' ' -f2-)";
5139
5140 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5141 let body = &source[..consumed];
5142
5143 assert_eq!(body, source);
5144 }
5145
5146 #[test]
5147 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5148 let source = "echo \"${@}\" | tr -d '[:space:]')";
5149
5150 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5151 let body = &source[..consumed];
5152
5153 assert_eq!(body, source);
5154 }
5155
5156 #[test]
5157 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5158 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5159
5160 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5161 let body = &source[..consumed];
5162
5163 assert_eq!(body, source);
5164 }
5165
5166 #[test]
5167 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5168 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5169
5170 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5171 let body = &source[..consumed];
5172
5173 assert_eq!(body, source);
5174 }
5175
5176 #[test]
5177 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5178 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5179 let mut lexer = Lexer::new(source);
5180
5181 let first = lexer.next_lexed_token().expect("expected first token");
5182 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5183 assert_eq!(first.word_string().as_deref(), Some("echo"));
5184
5185 let second = lexer.next_lexed_token().expect("expected second token");
5186 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5187 assert_eq!(
5188 second.word_string().as_deref(),
5189 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5190 );
5191 }
5192
5193 #[test]
5194 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5195 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5196 let start = source.find("$(").expect("expected command substitution") + 2;
5197 let consumed =
5198 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5199 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5200 }
5201
5202 #[test]
5203 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5204 let source = "echo $(echo $(basename $filename .fuzz))";
5205 let start = source.find("$(").expect("expected command substitution") + 2;
5206 let consumed =
5207 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5208 assert_eq!(
5209 &source[start..start + consumed],
5210 "echo $(basename $filename .fuzz))"
5211 );
5212 }
5213
5214 #[test]
5215 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5216 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5217 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5218 assert_eq!(consumed, source.len());
5219 }
5220
5221 #[test]
5222 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5223 let source = "'foo'bar";
5224 let mut lexer = Lexer::new(source);
5225
5226 let token = lexer.next_lexed_token().unwrap();
5227 assert_eq!(token.kind, TokenKind::LiteralWord);
5228
5229 let word = token.word().unwrap();
5230 let segments: Vec<_> = word
5231 .segments()
5232 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5233 .collect();
5234
5235 assert_eq!(
5236 segments,
5237 vec![
5238 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5239 (LexedWordSegmentKind::Plain, "bar".to_string()),
5240 ]
5241 );
5242 assert_eq!(word.joined_text(), "foobar");
5243 assert_eq!(
5244 word.segments()
5245 .nth(1)
5246 .and_then(LexedWordSegment::span)
5247 .unwrap()
5248 .slice(source),
5249 "bar"
5250 );
5251 }
5252
5253 #[test]
5254 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5255 let source = "$(printf hi)";
5256 let mut lexer = Lexer::new(source);
5257
5258 let token = lexer.next_lexed_token().unwrap();
5259 assert_eq!(token.kind, TokenKind::Word);
5260
5261 let word = token.word().unwrap();
5262 let segment = word.single_segment().unwrap();
5263 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5264 assert_eq!(segment.as_str(), source);
5265 assert_eq!(segment.span().unwrap().slice(source), source);
5266 }
5267
5268 #[test]
5269 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5270 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5271 let mut lexer = Lexer::new(source);
5272
5273 let token = lexer.next_lexed_token().unwrap();
5274 assert_eq!(token.kind, TokenKind::Word);
5275
5276 let word = token.word().unwrap();
5277 let segment = word.single_segment().unwrap();
5278 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5279 assert_eq!(segment.as_str(), source);
5280 assert_eq!(segment.span().unwrap().slice(source), source);
5281 }
5282
5283 #[test]
5284 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5285 let source = "\"foo\"$(printf hi)";
5286 let mut lexer = Lexer::new(source);
5287
5288 let token = lexer.next_lexed_token().unwrap();
5289 assert_eq!(token.kind, TokenKind::Word);
5290
5291 let word = token.word().unwrap();
5292 let continuation = word.segments().nth(1).unwrap();
5293 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5294 assert_eq!(continuation.as_str(), "$(printf hi)");
5295 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5296 }
5297
5298 #[test]
5299 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5300 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5301 let mut lexer = Lexer::new(source);
5302
5303 let token = lexer.next_lexed_token().unwrap();
5304 assert_eq!(token.kind, TokenKind::QuotedWord);
5305
5306 let word = token.word().unwrap();
5307 let segment = word.single_segment().unwrap();
5308 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5309 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5310 assert_eq!(
5311 segment.span().unwrap().slice(source),
5312 "${arr[$RANDOM % ${#arr[@]}]}"
5313 );
5314 }
5315
5316 #[test]
5317 fn test_ansi_c_control_escape_can_consume_quote() {
5318 let mut lexer = Lexer::new("echo $'\\c''");
5319
5320 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5321 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5322 assert!(lexer.next_lexed_token().is_none());
5323 }
5324
5325 #[test]
5326 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5327 let source = r#"out_line="${out_line//'"'/'\"'}"
5328"#;
5329 let mut lexer = Lexer::new(source);
5330
5331 assert_next_token(
5332 &mut lexer,
5333 TokenKind::Word,
5334 Some(r#"out_line=${out_line//'"'/'"'}"#),
5335 );
5336 assert_next_token(&mut lexer, TokenKind::Newline, None);
5337 assert!(lexer.next_lexed_token().is_none());
5338 }
5339
5340 #[test]
5341 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5342 let source = r#"out_line="${out_line//'"'/'\"'}"
5343echo "Error: Missing python3!"
5344cat << 'EOF' > "${pywrapper}"
5345import os
5346EOF
5347"#;
5348 let mut lexer = Lexer::new(source);
5349
5350 assert_next_token(
5351 &mut lexer,
5352 TokenKind::Word,
5353 Some(r#"out_line=${out_line//'"'/'"'}"#),
5354 );
5355 assert_next_token(&mut lexer, TokenKind::Newline, None);
5356 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5357 assert_next_token(
5358 &mut lexer,
5359 TokenKind::QuotedWord,
5360 Some("Error: Missing python3!"),
5361 );
5362 assert_next_token(&mut lexer, TokenKind::Newline, None);
5363 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5364 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5365 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5366 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5367 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5368 }
5369
5370 #[test]
5371 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5372 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5373 let mut lexer = Lexer::new(source);
5374
5375 let token = lexer.next_lexed_token().unwrap();
5376 assert_eq!(token.kind, TokenKind::Word);
5377 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5378 assert!(token.source_slice(source).is_none());
5379 assert_eq!(
5380 token.word_string().as_deref(),
5381 Some("crypt=${crypt//\\/\\\\}")
5382 );
5383 assert_next_token(&mut lexer, TokenKind::Newline, None);
5384 assert!(lexer.next_lexed_token().is_none());
5385 }
5386
5387 #[test]
5388 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5389 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5390 let mut lexer = Lexer::new(source);
5391
5392 assert_next_token(
5393 &mut lexer,
5394 TokenKind::Word,
5395 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5396 );
5397 assert_next_token(&mut lexer, TokenKind::Newline, None);
5398 assert_next_token(&mut lexer, TokenKind::Newline, None);
5399 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5400 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5401 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5402 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5403 assert_next_token(&mut lexer, TokenKind::Newline, None);
5404 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5405 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5406 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5407 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5408 assert_next_token(&mut lexer, TokenKind::Newline, None);
5409 assert_next_token(
5410 &mut lexer,
5411 TokenKind::Word,
5412 Some(
5413 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5414 ),
5415 );
5416 assert_next_token(&mut lexer, TokenKind::Newline, None);
5417 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5418 assert_next_token(&mut lexer, TokenKind::Newline, None);
5419 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5420 assert_next_token(&mut lexer, TokenKind::Newline, None);
5421 assert!(lexer.next_lexed_token().is_none());
5422 }
5423
5424 #[test]
5425 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5426 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5427 let mut lexer = Lexer::new(source);
5428
5429 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5430 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5431 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5432 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5433 assert_next_token(&mut lexer, TokenKind::And, None);
5434 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5435 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5436 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5437 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5438 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5439 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5440 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5441 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5442 assert_next_token(&mut lexer, TokenKind::Newline, None);
5443 assert!(lexer.next_lexed_token().is_none());
5444 }
5445
5446 #[test]
5447 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5448 let source = "echo -{$(echo a),b}-\n";
5449 let mut lexer = Lexer::new(source);
5450
5451 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5452 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5453 assert_next_token(&mut lexer, TokenKind::Newline, None);
5454 assert!(lexer.next_lexed_token().is_none());
5455 }
5456
5457 #[test]
5458 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5459 let source = "echo -{$((1 + 2)),b}-\n";
5460 let mut lexer = Lexer::new(source);
5461
5462 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5463 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5464 assert_next_token(&mut lexer, TokenKind::Newline, None);
5465 assert!(lexer.next_lexed_token().is_none());
5466 }
5467
5468 #[test]
5469 fn test_operators() {
5470 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5471
5472 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5473 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5474 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5475 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5476 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5477 assert_next_token(&mut lexer, TokenKind::And, None);
5478 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5479 assert_next_token(&mut lexer, TokenKind::Or, None);
5480 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5481 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5482 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5483 assert_next_token(&mut lexer, TokenKind::Background, None);
5484 assert!(lexer.next_lexed_token().is_none());
5485 }
5486
5487 #[test]
5488 fn test_double_left_bracket_requires_separator() {
5489 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5490
5491 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5492 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5493 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5494 assert_next_token(&mut lexer, TokenKind::Newline, None);
5495 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5496 assert_next_token(&mut lexer, TokenKind::Newline, None);
5497 assert!(lexer.next_lexed_token().is_none());
5498 }
5499
5500 #[test]
5501 fn test_redirects() {
5502 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5503
5504 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5505 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5506 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5507 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5508 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5509 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5510 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5511 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5512 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5513 let token = lexer.next_lexed_token().unwrap();
5514 assert_eq!(token.kind, TokenKind::Clobber);
5515 assert_eq!(token.fd_value(), Some(2));
5516 assert_eq!(token_text(&token, lexer.input), None);
5517 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5518 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5519 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5520 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5521 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5522 assert_next_token(&mut lexer, TokenKind::HereString, None);
5523 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5524 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5525 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5526 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5527 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5528 }
5529
5530 #[test]
5531 fn test_comment() {
5532 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5533
5534 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5535 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5536 assert_next_token(&mut lexer, TokenKind::Newline, None);
5537 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5538 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5539 }
5540
5541 #[test]
5542 fn test_comment_token_with_span() {
5543 let mut lexer = Lexer::new("# lead\necho hi # tail");
5544
5545 let comment = lexer.next_lexed_token_with_comments().unwrap();
5546 assert_eq!(comment.kind, TokenKind::Comment);
5547 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5548 assert_eq!(comment.span.start.line, 1);
5549 assert_eq!(comment.span.start.column, 1);
5550 assert_eq!(comment.span.end.line, 1);
5551 assert_eq!(comment.span.end.column, 7);
5552
5553 assert_next_token(&mut lexer, TokenKind::Newline, None);
5554 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5555 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5556
5557 let inline = lexer.next_lexed_token_with_comments().unwrap();
5558 assert_eq!(inline.kind, TokenKind::Comment);
5559 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5560 assert_eq!(inline.span.start.line, 2);
5561 assert_eq!(inline.span.start.column, 9);
5562 }
5563
5564 #[test]
5565 fn test_comment_token_preserves_hash_boundaries() {
5566 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5567
5568 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5569 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5570 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5571 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5572 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5573 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5574 assert!(lexer.next_lexed_token_with_comments().is_none());
5575 }
5576
5577 #[test]
5578 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5579 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5580
5581 let mut saw_comment = false;
5582 while let Some(token) = lexer.next_lexed_token_with_comments() {
5583 if token.kind == TokenKind::Comment {
5584 saw_comment = true;
5585 break;
5586 }
5587 }
5588
5589 assert!(
5590 !saw_comment,
5591 "zsh inline glob controls inside [[ ]] should not lex as comments"
5592 );
5593 }
5594
5595 #[test]
5596 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5597 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5598
5599 let mut saw_comment = false;
5600 while let Some(token) = lexer.next_lexed_token_with_comments() {
5601 if token.kind == TokenKind::Comment {
5602 saw_comment = true;
5603 break;
5604 }
5605 }
5606
5607 assert!(
5608 !saw_comment,
5609 "zsh arithmetic char literals inside (( )) should not lex as comments"
5610 );
5611 }
5612
5613 #[test]
5614 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5615 let mut lexer = Lexer::new(
5616 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5617 );
5618
5619 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5620 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5621 assert_next_token(
5622 &mut lexer,
5623 TokenKind::LiteralWord,
5624 Some("\\e]133;C;cmdline_url=%s\\a"),
5625 );
5626 assert_next_token(
5627 &mut lexer,
5628 TokenKind::QuotedWord,
5629 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5630 );
5631 assert_next_token(&mut lexer, TokenKind::Newline, None);
5632 }
5633
5634 #[test]
5635 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5636 let mut lexer = Lexer::new(
5637 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5638 );
5639
5640 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5641 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5642 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5643 assert_next_token(&mut lexer, TokenKind::Newline, None);
5644 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5645 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5646 assert_next_token(
5647 &mut lexer,
5648 TokenKind::LiteralWord,
5649 Some("\\e]133;C;cmdline_url=%s\\a"),
5650 );
5651 assert_next_token(
5652 &mut lexer,
5653 TokenKind::QuotedWord,
5654 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5655 );
5656 assert_next_token(&mut lexer, TokenKind::Newline, None);
5657 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5658 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5659 assert_next_token(&mut lexer, TokenKind::Newline, None);
5660 }
5661
5662 #[test]
5663 fn test_variable_words() {
5664 let mut lexer = Lexer::new("echo $HOME $USER");
5665
5666 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5667 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5668 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5669 assert!(lexer.next_lexed_token().is_none());
5670 }
5671
5672 #[test]
5673 fn test_pipeline_tokens() {
5674 let mut lexer = Lexer::new("echo hello | cat");
5675
5676 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5677 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5678 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5679 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5680 assert!(lexer.next_lexed_token().is_none());
5681 }
5682
5683 #[test]
5684 fn test_read_heredoc() {
5685 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5687 let content = lexer.read_heredoc("EOF", false);
5688 assert_eq!(content.content, "hello\nworld\n");
5689 }
5690
5691 #[test]
5692 fn test_read_heredoc_single_line() {
5693 let mut lexer = Lexer::new("\ntest\nEOF");
5694 let content = lexer.read_heredoc("EOF", false);
5695 assert_eq!(content.content, "test\n");
5696 }
5697
5698 #[test]
5699 fn test_read_heredoc_full_scenario() {
5700 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5702
5703 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5705 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5706 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5707
5708 let content = lexer.read_heredoc("EOF", false);
5710 assert_eq!(content.content, "hello\nworld\n");
5711 }
5712
5713 #[test]
5714 fn test_read_heredoc_with_redirect() {
5715 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5717 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5718 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5719 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5720 let content = lexer.read_heredoc("EOF", false);
5721 assert_eq!(content.content, "hello\n");
5722 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5724 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5725 }
5726
5727 #[test]
5728 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5729 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
5730 let mut lexer = Lexer::new(source);
5731
5732 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5733 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5734 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5735
5736 let heredoc = lexer.read_heredoc("EOF", false);
5737 assert_eq!(heredoc.content, "hello\n");
5738
5739 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5740 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5741 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5742 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5743 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5744 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5745 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5746 }
5747
5748 #[test]
5749 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5750 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5751 let mut lexer = Lexer::new(source);
5752
5753 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5754 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5755 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5756
5757 let heredoc = lexer.read_heredoc("EOF", false);
5758 assert_eq!(heredoc.content, "1\n2\n3\n");
5759 }
5760
5761 #[test]
5762 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5763 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5764 let mut lexer = Lexer::new(source);
5765
5766 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5767 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5768 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5769
5770 let heredoc = lexer.read_heredoc("EOF", false);
5771 assert_eq!(heredoc.content, "body\n");
5772 }
5773
5774 #[test]
5775 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5776 let source = "cat <<EOF # note \\\nbody\nEOF\n";
5777 let mut lexer = Lexer::new(source);
5778
5779 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5780 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5781 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5782
5783 let heredoc = lexer.read_heredoc("EOF", false);
5784 assert_eq!(heredoc.content, "body\n");
5785 }
5786
5787 #[test]
5788 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5789 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5790 let mut lexer = Lexer::new(source);
5791
5792 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5793 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5794 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5795 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5796
5797 let heredoc = lexer.read_heredoc("EOF", false);
5798 assert_eq!(heredoc.content, "body\n");
5799
5800 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5801 }
5802
5803 #[test]
5804 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5805 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5806 let mut lexer = Lexer::new(source);
5807
5808 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5809 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5810 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5811
5812 let heredoc = lexer.read_heredoc("EOF", false);
5813 assert_eq!(heredoc.content, "1\n");
5814
5815 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5816 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5817 }
5818
5819 #[test]
5820 fn test_read_heredoc_with_redirect_preserves_following_spans() {
5821 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5822 let mut lexer = Lexer::new(source);
5823
5824 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5825 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5826 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5827
5828 let heredoc = lexer.read_heredoc("EOF", false);
5829 assert_eq!(heredoc.content, "hello\n");
5830
5831 let redirect = lexer.next_lexed_token_with_comments().unwrap();
5832 assert_eq!(redirect.kind, TokenKind::RedirectOut);
5833 assert_eq!(redirect.span.slice(source), ">");
5834
5835 let target = lexer.next_lexed_token_with_comments().unwrap();
5836 assert_eq!(target.kind, TokenKind::Word);
5837 assert_eq!(
5838 token_text(&target, lexer.input).as_deref(),
5839 Some("file.txt")
5840 );
5841 assert_eq!(target.span.slice(source), "file.txt");
5842
5843 let newline = lexer.next_lexed_token_with_comments().unwrap();
5844 assert_eq!(newline.kind, TokenKind::Newline);
5845 assert_eq!(newline.span.slice(source), "\n");
5846
5847 let comment = lexer.next_lexed_token_with_comments().unwrap();
5848 assert_eq!(comment.kind, TokenKind::Comment);
5849 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5850 assert_eq!(comment.span.slice(source), "# done");
5851 }
5852
5853 #[test]
5854 fn test_comment_with_unicode() {
5855 let source = "# café résumé\necho ok";
5857 let mut lexer = Lexer::new(source);
5858
5859 let comment = lexer.next_lexed_token_with_comments().unwrap();
5860 assert_eq!(comment.kind, TokenKind::Comment);
5861 assert_eq!(
5862 token_text(&comment, lexer.input).as_deref(),
5863 Some(" café résumé")
5864 );
5865 let start = comment.span.start.offset;
5867 let end = comment.span.end.offset;
5868 assert_eq!(start, 0);
5869 assert_eq!(&source[start..end], "# café résumé");
5870 assert!(source.is_char_boundary(start));
5871 assert!(source.is_char_boundary(end));
5872
5873 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5874 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5875 }
5876
5877 #[test]
5878 fn test_comment_with_cjk_characters() {
5879 let source = "# 你好世界\necho ok";
5881 let mut lexer = Lexer::new(source);
5882
5883 let comment = lexer.next_lexed_token_with_comments().unwrap();
5884 assert_eq!(comment.kind, TokenKind::Comment);
5885 assert_eq!(
5886 token_text(&comment, lexer.input).as_deref(),
5887 Some(" 你好世界")
5888 );
5889 let start = comment.span.start.offset;
5890 let end = comment.span.end.offset;
5891 assert_eq!(&source[start..end], "# 你好世界");
5892 assert!(source.is_char_boundary(start));
5893 assert!(source.is_char_boundary(end));
5894 }
5895
5896 #[test]
5897 fn test_heredoc_with_comments_inside() {
5898 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
5900 let mut lexer = Lexer::new(source);
5901
5902 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5903 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5904 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
5905
5906 let heredoc = lexer.read_heredoc("EOF", false);
5907 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
5908
5909 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5912 let comment = lexer.next_lexed_token_with_comments().unwrap();
5913 assert_eq!(comment.kind, TokenKind::Comment);
5914 assert_eq!(
5915 token_text(&comment, lexer.input).as_deref(),
5916 Some(" real comment")
5917 );
5918 }
5919
5920 #[test]
5921 fn test_heredoc_with_hash_in_variable() {
5922 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
5924 let mut lexer = Lexer::new(source);
5925
5926 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5927 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5928 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
5929
5930 let heredoc = lexer.read_heredoc("EOF", false);
5931 assert_eq!(heredoc.content, "val=${x#prefix}\n");
5932 }
5933
5934 #[test]
5935 fn test_heredoc_span_does_not_leak() {
5936 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
5939 let mut lexer = Lexer::new(source);
5940
5941 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5942 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5943 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5944
5945 let heredoc = lexer.read_heredoc("EOF", false);
5946 let start = heredoc.content_span.start.offset;
5947 let end = heredoc.content_span.end.offset;
5948 assert!(
5949 end <= source.len(),
5950 "heredoc span end ({end}) exceeds source length ({})",
5951 source.len()
5952 );
5953 assert_eq!(&source[start..end], "hello\nworld\n");
5954
5955 assert_next_token(&mut lexer, TokenKind::Newline, None);
5957 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5958 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
5959 }
5960
5961 #[test]
5962 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
5963 let source = "\
5964cat <<\\_ACEOF
5965Use these variables to override the choices made by `configure' or to help
5966it to find libraries and programs with nonstandard names/locations.
5967_ACEOF
5968ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
5969ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
5970";
5971 let mut lexer = Lexer::new(source);
5972
5973 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5974 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5975 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
5976 assert_eq!(delimiter.kind, TokenKind::Word);
5977 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
5978
5979 let heredoc = lexer.read_heredoc("_ACEOF", false);
5980 assert_eq!(
5981 heredoc.content,
5982 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
5983 );
5984
5985 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5986
5987 let first = lexer.next_lexed_token_with_comments().unwrap();
5988 assert_eq!(first.kind, TokenKind::Word);
5989 assert_eq!(
5990 first.span.slice(source),
5991 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
5992 );
5993 let first_segments = first
5994 .word()
5995 .unwrap()
5996 .segments()
5997 .map(|segment| {
5998 (
5999 segment.kind(),
6000 segment.as_str().to_string(),
6001 segment.span().map(|span| span.slice(source).to_string()),
6002 )
6003 })
6004 .collect::<Vec<_>>();
6005 assert_eq!(
6006 first_segments,
6007 vec![
6008 (
6009 LexedWordSegmentKind::Plain,
6010 "ac_dir_suffix=/".to_string(),
6011 Some("ac_dir_suffix=/".to_string()),
6012 ),
6013 (
6014 LexedWordSegmentKind::Plain,
6015 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6016 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6017 ),
6018 ]
6019 );
6020
6021 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6022
6023 let second = lexer.next_lexed_token_with_comments().unwrap();
6024 assert_eq!(second.kind, TokenKind::Word);
6025 assert_eq!(
6026 second.span.slice(source),
6027 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6028 );
6029 let second_segments = second
6030 .word()
6031 .unwrap()
6032 .segments()
6033 .map(|segment| {
6034 (
6035 segment.kind(),
6036 segment.as_str().to_string(),
6037 segment.span().map(|span| span.slice(source).to_string()),
6038 )
6039 })
6040 .collect::<Vec<_>>();
6041 assert_eq!(
6042 second_segments,
6043 vec![
6044 (
6045 LexedWordSegmentKind::Plain,
6046 "ac_top_builddir_sub=".to_string(),
6047 Some("ac_top_builddir_sub=".to_string()),
6048 ),
6049 (
6050 LexedWordSegmentKind::Plain,
6051 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6052 Some(
6053 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6054 .to_string(),
6055 ),
6056 ),
6057 ]
6058 );
6059 }
6060
6061 #[test]
6062 fn test_heredoc_with_unicode_content() {
6063 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6065 let mut lexer = Lexer::new(source);
6066
6067 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6068 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6069 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6070
6071 let heredoc = lexer.read_heredoc("EOF", false);
6072 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6073 let start = heredoc.content_span.start.offset;
6074 let end = heredoc.content_span.end.offset;
6075 assert!(
6076 source.is_char_boundary(start),
6077 "heredoc span start ({start}) not on char boundary"
6078 );
6079 assert!(
6080 source.is_char_boundary(end),
6081 "heredoc span end ({end}) not on char boundary"
6082 );
6083 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6084 }
6085
6086 #[test]
6087 fn test_assoc_compound_assignment() {
6088 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6091 assert_next_token(
6092 &mut lexer,
6093 TokenKind::Word,
6094 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6095 );
6096 assert!(lexer.next_lexed_token().is_none());
6097 }
6098
6099 #[test]
6100 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6101 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6102 let mut lexer = Lexer::new(source);
6103
6104 let token = lexer.next_lexed_token().unwrap();
6105 assert_eq!(token.kind, TokenKind::Word);
6106 assert_eq!(token.span.slice(source), source);
6107 assert!(lexer.next_lexed_token().is_none());
6108 }
6109
6110 #[test]
6111 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6112 let source = r#"foo\_bar@(baz|qux)"#;
6113 let mut lexer = Lexer::new(source);
6114
6115 let token = lexer.next_lexed_token().unwrap();
6116 assert_eq!(token.kind, TokenKind::Word);
6117 assert_eq!(token.span.slice(source), source);
6118 assert!(lexer.next_lexed_token().is_none());
6119 }
6120
6121 #[test]
6122 fn test_indexed_array_not_collapsed() {
6123 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6126 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6127 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6128 }
6129
6130 #[test]
6131 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6132 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6133 let mut lexer = Lexer::new(source);
6134
6135 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6136 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6137
6138 let token = lexer.next_lexed_token().unwrap();
6139 assert_eq!(token.kind, TokenKind::Word);
6140 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6141
6142 let word = token.word().unwrap();
6143 let segments: Vec<_> = word
6144 .segments()
6145 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6146 .collect();
6147 assert_eq!(
6148 segments,
6149 vec![
6150 (
6151 LexedWordSegmentKind::DoubleQuoted,
6152 "$plugin_dir".to_string()
6153 ),
6154 (LexedWordSegmentKind::Plain, "/*".to_string()),
6155 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6156 ]
6157 );
6158
6159 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6160 assert!(lexer.next_lexed_token().is_none());
6161 }
6162
6163 #[test]
6164 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6165 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6166 let mut lexer = Lexer::new(source);
6167
6168 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6169 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6170
6171 let token = lexer.next_lexed_token().unwrap();
6172 assert_eq!(token.kind, TokenKind::Word);
6173 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6174
6175 let word = token.word().unwrap();
6176 let segments: Vec<_> = word
6177 .segments()
6178 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6179 .collect();
6180 assert_eq!(
6181 segments,
6182 vec![
6183 (
6184 LexedWordSegmentKind::DoubleQuoted,
6185 "$__GREP_CACHE_FILE".to_string()
6186 ),
6187 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6188 ]
6189 );
6190
6191 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6192 assert!(lexer.next_lexed_token().is_none());
6193 }
6194
6195 #[test]
6196 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6197 let source = r#"$dir/${~pats}(N)"#;
6198 let mut lexer = Lexer::new(source);
6199
6200 let token = lexer.next_lexed_token().unwrap();
6201 assert_eq!(token.kind, TokenKind::Word);
6202 assert_eq!(token.span.slice(source), source);
6203 assert!(lexer.next_lexed_token().is_none());
6204 }
6205
6206 #[test]
6207 fn test_dollar_word_does_not_absorb_function_parens() {
6208 let mut lexer = Lexer::new(r#"foo$x()"#);
6209
6210 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6211 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6212 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6213 assert!(lexer.next_lexed_token().is_none());
6214 }
6215
6216 #[test]
6217 fn test_command_substitution_word_does_not_absorb_function_parens() {
6218 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6219
6220 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6221 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6222 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6223 assert!(lexer.next_lexed_token().is_none());
6224 }
6225
6226 #[test]
6229 fn test_digit_at_eof_no_panic() {
6230 let mut lexer = Lexer::new("2");
6232 let token = lexer.next_lexed_token();
6233 assert!(token.is_some());
6234 }
6235
6236 #[test]
6238 fn test_nested_brace_expansion_single_token() {
6239 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6241 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6242 assert!(lexer.next_lexed_token().is_none());
6244 }
6245
6246 #[test]
6248 fn test_simple_brace_expansion_unchanged() {
6249 let mut lexer = Lexer::new("${foo}");
6250 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6251 assert!(lexer.next_lexed_token().is_none());
6252 }
6253
6254 #[test]
6255 fn test_nvm_fixture_lexes_without_stalling() {
6256 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6257 let mut lexer = Lexer::new(input);
6258 let mut tokens = 0usize;
6259
6260 while lexer.next_lexed_token().is_some() {
6261 tokens += 1;
6262 assert!(
6263 tokens < 100_000,
6264 "lexer should continue making progress on the nvm fixture"
6265 );
6266 }
6267
6268 assert!(tokens > 0, "nvm fixture should produce at least one token");
6269 }
6270
6271 #[test]
6272 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6273 let input = concat!(
6274 "case \"${_input_type:-}\" in\n",
6275 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6276 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6277 "esac\n",
6278 );
6279
6280 assert_non_newline_tokens_stay_on_one_line(input);
6281
6282 let mut lexer = Lexer::new(input);
6283 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6284 .map(|token| (token.kind, token_text(&token, input)))
6285 .collect::<Vec<_>>();
6286 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6287 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6288 }
6289
6290 #[test]
6291 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6292 let input = concat!(
6293 "case $2 in\n",
6294 " cygwin*) bin='cygwin32/bin' ;|\n",
6295 "esac\n",
6296 );
6297
6298 let mut lexer = Lexer::new(input);
6299 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6300 .map(|token| (token.kind, token_text(&token, input)))
6301 .collect::<Vec<_>>();
6302
6303 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6304 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6305 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6306 }
6307
6308 #[test]
6309 fn test_inline_if_with_array_append_stays_line_local() {
6310 let input = concat!(
6311 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6312 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6313 );
6314
6315 assert_non_newline_tokens_stay_on_one_line(input);
6316 }
6317
6318 #[test]
6319 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6320 let source = "unsetopt interactive_comments\n#literal\n";
6321 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6322 let mut lexer = Lexer::with_profile(source, &profile);
6323
6324 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6325 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6326 assert_next_token(&mut lexer, TokenKind::Newline, None);
6327 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6328 }
6329
6330 #[test]
6331 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6332 let source = "setopt rc_quotes\nprint 'a''b'\n";
6333 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6334 let mut lexer = Lexer::with_profile(source, &profile);
6335
6336 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6337 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6338 assert_next_token(&mut lexer, TokenKind::Newline, None);
6339 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6340 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6341 }
6342
6343 #[test]
6344 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6345 let source = "setopt ignore_braces\n{ echo }\n";
6346 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6347 let mut lexer = Lexer::with_profile(source, &profile);
6348
6349 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6350 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6351 assert_next_token(&mut lexer, TokenKind::Newline, None);
6352 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6353 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6354 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6355 }
6356
6357 #[test]
6358 fn test_heredoc_in_arithmetic_fuzz_crash() {
6359 let data: &[u8] = &[
6363 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6364 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6365 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6366 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6367 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6368 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6369 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6370 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6371 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6372 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6373 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6374 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6375 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6376 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6377 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6378 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6379 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6380 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6381 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6382 ];
6383 let input = std::str::from_utf8(data).unwrap();
6384 let script = format!("echo $(({input}))\n");
6385 let _ = crate::parser::Parser::new(&script).parse();
6387 }
6388}