1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335 pub kind: TokenKind,
337 pub span: Span,
339 pub(crate) flags: TokenFlags,
340 payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345 match kind {
346 TokenKind::Word => LexedWordSegmentKind::Plain,
347 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349 _ => LexedWordSegmentKind::Composite,
350 }
351 }
352
353 pub(crate) fn punctuation(kind: TokenKind) -> Self {
354 Self {
355 kind,
356 span: Span::new(),
357 flags: TokenFlags::empty(),
358 payload: TokenPayload::None,
359 }
360 }
361
362 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363 let flags = if word.has_cooked_text() {
364 TokenFlags::cooked_text()
365 } else {
366 TokenFlags::empty()
367 };
368
369 Self {
370 kind,
371 span: Span::new(),
372 flags,
373 payload: TokenPayload::Word(word),
374 }
375 }
376
377 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378 Self::with_word_payload(
379 kind,
380 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381 )
382 }
383
384 fn owned_word(kind: TokenKind, text: String) -> Self {
385 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386 }
387
388 fn comment() -> Self {
389 Self {
390 kind: TokenKind::Comment,
391 span: Span::new(),
392 flags: TokenFlags::empty(),
393 payload: TokenPayload::None,
394 }
395 }
396
397 fn fd(kind: TokenKind, fd: i32) -> Self {
398 Self {
399 kind,
400 span: Span::new(),
401 flags: TokenFlags::empty(),
402 payload: TokenPayload::Fd(fd),
403 }
404 }
405
406 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407 Self {
408 kind,
409 span: Span::new(),
410 flags: TokenFlags::empty(),
411 payload: TokenPayload::FdPair(src_fd, dst_fd),
412 }
413 }
414
415 fn error(kind: LexerErrorKind) -> Self {
416 Self {
417 kind: TokenKind::Error,
418 span: Span::new(),
419 flags: TokenFlags::empty(),
420 payload: TokenPayload::Error(kind),
421 }
422 }
423
424 pub(crate) fn with_span(mut self, span: Span) -> Self {
425 self.span = span;
426 self
427 }
428
429 pub(crate) fn rebased(mut self, base: Position) -> Self {
430 self.span = self.span.rebased(base);
431 self.payload = match self.payload {
432 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433 payload => payload,
434 };
435 self
436 }
437
438 pub(crate) fn with_synthetic_flag(mut self) -> Self {
439 self.flags = self.flags.with_synthetic();
440 self
441 }
442
443 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444 let payload = match self.payload {
445 TokenPayload::None => TokenPayload::None,
446 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449 TokenPayload::Error(kind) => TokenPayload::Error(kind),
450 };
451
452 LexedToken {
453 kind: self.kind,
454 span: self.span,
455 flags: self.flags,
456 payload,
457 }
458 }
459
460 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461 let payload = match self.payload {
462 TokenPayload::None => TokenPayload::None,
463 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466 TokenPayload::Error(kind) => TokenPayload::Error(kind),
467 };
468
469 LexedToken {
470 kind: self.kind,
471 span: self.span,
472 flags: self.flags,
473 payload,
474 }
475 }
476
477 pub fn word_text(&self) -> Option<&str> {
479 self.kind
480 .is_word_like()
481 .then_some(())
482 .and_then(|_| match &self.payload {
483 TokenPayload::Word(word) => word.text(),
484 _ => None,
485 })
486 }
487
488 pub fn word_string(&self) -> Option<String> {
490 self.kind
491 .is_word_like()
492 .then_some(())
493 .and_then(|_| match &self.payload {
494 TokenPayload::Word(word) => Some(word.joined_text()),
495 _ => None,
496 })
497 }
498
499 pub fn word(&self) -> Option<&LexedWord<'a>> {
501 match &self.payload {
502 TokenPayload::Word(word) => Some(word),
503 _ => None,
504 }
505 }
506
507 pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510 return None;
511 }
512
513 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514 .then(|| &source[self.span.start.offset..self.span.end.offset])
515 }
516
517 pub fn fd_value(&self) -> Option<i32> {
519 match self.payload {
520 TokenPayload::Fd(fd) => Some(fd),
521 _ => None,
522 }
523 }
524
525 pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527 match self.payload {
528 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529 _ => None,
530 }
531 }
532
533 pub fn error_kind(&self) -> Option<LexerErrorKind> {
535 match self.payload {
536 TokenPayload::Error(kind) => Some(kind),
537 _ => None,
538 }
539 }
540}
541
542#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545 pub content: String,
547 pub content_span: Span,
549}
550
551const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557 rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561 fn new(source: &'a str) -> Self {
562 Self { rest: source }
563 }
564
565 fn first(&self) -> Option<char> {
566 self.rest.chars().next()
567 }
568
569 fn second(&self) -> Option<char> {
570 let mut chars = self.rest.chars();
571 chars.next()?;
572 chars.next()
573 }
574
575 fn third(&self) -> Option<char> {
576 let mut chars = self.rest.chars();
577 chars.next()?;
578 chars.next()?;
579 chars.next()
580 }
581
582 fn bump(&mut self) -> Option<char> {
583 let ch = self.first()?;
584 self.rest = &self.rest[ch.len_utf8()..];
585 Some(ch)
586 }
587
588 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589 let start = self.rest;
590 let mut end = 0;
591
592 for ch in start.chars() {
593 if !predicate(ch) {
594 break;
595 }
596 end += ch.len_utf8();
597 }
598
599 self.rest = &start[end..];
600 &start[..end]
601 }
602
603 fn rest(&self) -> &'a str {
604 self.rest
605 }
606
607 fn skip_bytes(&mut self, count: usize) {
608 self.rest = &self.rest[count..];
609 }
610
611 fn find_byte(&self, byte: u8) -> Option<usize> {
612 memchr(byte, self.rest.as_bytes())
613 }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618 source: &'a str,
619 line_starts: Vec<usize>,
620 cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626 pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630 fn new(source: &'a str) -> Self {
631 let mut line_starts =
632 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633 line_starts.push(0);
634 line_starts.extend(
635 source
636 .bytes()
637 .enumerate()
638 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639 );
640
641 Self {
642 source,
643 line_starts,
644 cached: Position::new(),
645 }
646 }
647
648 fn position(&mut self, offset: usize) -> Position {
649 if offset == self.cached.offset {
650 return self.cached;
651 }
652
653 let position = if offset > self.cached.offset && offset <= self.source.len() {
654 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655 } else {
656 self.position_uncached(offset)
657 };
658 self.cached = position;
659 position
660 }
661
662 fn position_uncached(&self, offset: usize) -> Position {
663 let offset = offset.min(self.source.len());
664 let line_index = self
665 .line_starts
666 .partition_point(|start| *start <= offset)
667 .saturating_sub(1);
668 let line_start = self.line_starts[line_index];
669 let line_text = &self.source[line_start..offset];
670 let column = if line_text.is_ascii() {
671 line_text.len() + 1
672 } else {
673 line_text.chars().count() + 1
674 };
675
676 Position {
677 line: line_index + 1,
678 column,
679 offset,
680 }
681 }
682
683 fn advance_from(mut position: Position, text: &str) -> Position {
684 position.offset += text.len();
685 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686 if newline_count == 0 {
687 position.column += if text.is_ascii() {
688 text.len()
689 } else {
690 text.chars().count()
691 };
692 return position;
693 }
694
695 position.line += newline_count;
696 let tail_start = memrchr(b'\n', text.as_bytes())
697 .map(|index| index + 1)
698 .unwrap_or_default();
699 let tail = &text[tail_start..];
700 position.column = if tail.is_ascii() {
701 tail.len() + 1
702 } else {
703 tail.chars().count() + 1
704 };
705 position
706 }
707}
708
709#[derive(Clone)]
711pub struct Lexer<'a> {
712 #[allow(dead_code)] input: &'a str,
714 offset: usize,
716 cursor: Cursor<'a>,
717 position_map: PositionMap<'a>,
718 reinject_buf: VecDeque<char>,
721 reinject_resume_offset: Option<usize>,
723 max_subst_depth: usize,
725 initial_zsh_options: Option<ZshOptionState>,
726 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727 zsh_timeline_index: usize,
728 #[cfg(feature = "benchmarking")]
729 benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733 pub fn new(input: &'a str) -> Self {
735 Self::with_max_subst_depth_and_profile(
736 input,
737 DEFAULT_MAX_SUBST_DEPTH,
738 &ShellProfile::native(super::ShellDialect::Bash),
739 None,
740 )
741 }
742
743 pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746 Self::with_max_subst_depth_and_profile(
747 input,
748 max_depth,
749 &ShellProfile::native(super::ShellDialect::Bash),
750 None,
751 )
752 }
753
754 pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757 .then(|| ZshOptionTimeline::build(input, shell_profile))
758 .flatten()
759 .map(Arc::new);
760 Self::with_max_subst_depth_and_profile(
761 input,
762 DEFAULT_MAX_SUBST_DEPTH,
763 shell_profile,
764 zsh_timeline,
765 )
766 }
767
768 pub(crate) fn with_max_subst_depth_and_profile(
769 input: &'a str,
770 max_depth: usize,
771 shell_profile: &ShellProfile,
772 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773 ) -> Self {
774 Self {
775 input,
776 offset: 0,
777 cursor: Cursor::new(input),
778 position_map: PositionMap::new(input),
779 reinject_buf: VecDeque::new(),
780 reinject_resume_offset: None,
781 max_subst_depth: max_depth,
782 initial_zsh_options: shell_profile.zsh_options().cloned(),
783 zsh_timeline,
784 zsh_timeline_index: 0,
785 #[cfg(feature = "benchmarking")]
786 benchmark_counters: None,
787 }
788 }
789
790 pub fn position(&self) -> Position {
792 self.position_map.position_uncached(self.offset)
793 }
794
795 fn current_position(&mut self) -> Position {
796 #[cfg(feature = "benchmarking")]
797 self.maybe_record_current_position_call();
798 self.position_map.position(self.offset)
799 }
800
801 #[cfg(feature = "benchmarking")]
802 pub(crate) fn enable_benchmark_counters(&mut self) {
803 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
804 }
805
806 #[cfg(feature = "benchmarking")]
807 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
808 self.benchmark_counters.unwrap_or_default()
809 }
810
811 #[cfg(feature = "benchmarking")]
812 fn maybe_record_current_position_call(&mut self) {
813 if let Some(counters) = &mut self.benchmark_counters {
814 counters.current_position_calls += 1;
815 }
816 }
817
818 fn sync_offset_to_cursor(&mut self) {
819 if self.reinject_buf.is_empty()
820 && let Some(offset) = self.reinject_resume_offset.take()
821 {
822 self.offset = offset;
823 }
824 }
825
826 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
829 self.next_lexed_token().map(|token| token.kind)
830 }
831
832 fn peek_char(&mut self) -> Option<char> {
833 self.sync_offset_to_cursor();
834 if let Some(&ch) = self.reinject_buf.front() {
835 Some(ch)
836 } else {
837 self.cursor.first()
838 }
839 }
840
841 fn advance(&mut self) -> Option<char> {
842 self.sync_offset_to_cursor();
843 let ch = if !self.reinject_buf.is_empty() {
844 self.reinject_buf.pop_front()
845 } else {
846 self.cursor.bump()
847 };
848 if let Some(c) = ch {
849 self.offset += c.len_utf8();
850 }
851 ch
852 }
853
854 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
855 self.reinject_buf
856 .iter()
857 .copied()
858 .chain(self.cursor.rest().chars())
859 }
860
861 fn second_char(&self) -> Option<char> {
862 match self.reinject_buf.len() {
863 0 => self.cursor.second(),
864 1 => self.cursor.first(),
865 _ => self.reinject_buf.get(1).copied(),
866 }
867 }
868
869 fn third_char(&self) -> Option<char> {
870 match self.reinject_buf.len() {
871 0 => self.cursor.third(),
872 1 => self.cursor.second(),
873 2 => self.cursor.first(),
874 _ => self.reinject_buf.get(2).copied(),
875 }
876 }
877
878 fn fourth_char(&self) -> Option<char> {
879 match self.reinject_buf.len() {
880 0 => self.cursor.rest().chars().nth(3),
881 1 => self.cursor.third(),
882 2 => self.cursor.second(),
883 3 => self.cursor.first(),
884 _ => self.reinject_buf.get(3).copied(),
885 }
886 }
887
888 fn consume_source_bytes(&mut self, byte_len: usize) {
889 debug_assert!(self.reinject_buf.is_empty());
890 self.sync_offset_to_cursor();
891 self.offset += byte_len;
892 self.cursor.skip_bytes(byte_len);
893 }
894
895 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
896 debug_assert!(self.reinject_buf.is_empty());
897 self.offset += byte_len;
898 }
899
900 fn consume_ascii_chars(&mut self, count: usize) {
901 if self.reinject_buf.is_empty() {
902 self.consume_source_bytes(count);
903 return;
904 }
905
906 for _ in 0..count {
907 self.advance();
908 }
909 }
910
911 fn source_horizontal_whitespace_len(&self) -> usize {
912 self.cursor
913 .rest()
914 .as_bytes()
915 .iter()
916 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
917 .count()
918 }
919
920 fn source_ascii_plain_word_len(&self) -> usize {
921 self.cursor
922 .rest()
923 .as_bytes()
924 .iter()
925 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
926 .count()
927 }
928
929 fn find_double_quote_special(source: &str) -> Option<usize> {
930 source
931 .as_bytes()
932 .iter()
933 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
934 }
935
936 fn ensure_capture_from_source(
937 &self,
938 capture: &mut Option<String>,
939 start: Position,
940 end: Position,
941 ) {
942 if capture.is_none() {
943 *capture = Some(self.input[start.offset..end.offset].to_string());
944 }
945 }
946
947 fn push_capture_char(capture: &mut Option<String>, ch: char) {
948 if let Some(text) = capture.as_mut() {
949 text.push(ch);
950 }
951 }
952
953 fn push_capture_str(capture: &mut Option<String>, text: &str) {
954 if let Some(current) = capture.as_mut() {
955 current.push_str(text);
956 }
957 }
958
959 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
960 if let Some(timeline) = self.zsh_timeline.as_ref() {
961 while self.zsh_timeline_index < timeline.entries.len()
962 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
963 {
964 self.zsh_timeline_index += 1;
965 }
966 return if self.zsh_timeline_index == 0 {
967 self.initial_zsh_options.as_ref()
968 } else {
969 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
970 };
971 }
972
973 self.initial_zsh_options.as_ref()
974 }
975
976 fn comments_enabled(&mut self) -> bool {
977 !self
978 .current_zsh_options()
979 .is_some_and(|options| options.interactive_comments.is_definitely_off())
980 }
981
982 fn rc_quotes_enabled(&mut self) -> bool {
983 self.current_zsh_options()
984 .is_some_and(|options| options.rc_quotes.is_definitely_on())
985 }
986
987 fn ignore_braces_enabled(&mut self) -> bool {
988 self.current_zsh_options()
989 .is_some_and(|options| options.ignore_braces.is_definitely_on())
990 }
991
992 fn ignore_close_braces_enabled(&mut self) -> bool {
993 self.current_zsh_options().is_some_and(|options| {
994 options.ignore_braces.is_definitely_on()
995 || options.ignore_close_braces.is_definitely_on()
996 })
997 }
998
999 fn should_treat_hash_as_word_char(&mut self) -> bool {
1000 if !self.comments_enabled() {
1001 return true;
1002 }
1003 self.reinject_buf.is_empty()
1004 && (self
1005 .input
1006 .get(..self.offset)
1007 .and_then(|prefix| prefix.chars().next_back())
1008 .is_some_and(|prev| {
1009 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1010 })
1011 || self.is_inside_unclosed_double_paren_on_line())
1012 }
1013
1014 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1015 capture
1016 .as_deref()
1017 .unwrap_or(&self.input[start.offset..self.offset])
1018 }
1019
1020 fn current_word_surface_is_single_char(
1021 &self,
1022 start: Position,
1023 capture: &Option<String>,
1024 target: char,
1025 ) -> bool {
1026 let text = self.current_word_text(start, capture);
1027 if !text.contains('\x00') {
1028 let mut encoded = [0; 4];
1029 return text == target.encode_utf8(&mut encoded);
1030 }
1031
1032 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1033 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1034 }
1035
1036 fn current_word_surface_last_char<'b>(
1037 &'b self,
1038 start: Position,
1039 capture: &'b Option<String>,
1040 ) -> Option<char> {
1041 self.current_word_text(start, capture)
1042 .chars()
1043 .rev()
1044 .find(|&ch| ch != '\x00')
1045 }
1046
1047 fn current_word_surface_ends_with_char(
1048 &self,
1049 start: Position,
1050 capture: &Option<String>,
1051 target: char,
1052 ) -> bool {
1053 self.current_word_surface_last_char(start, capture) == Some(target)
1054 }
1055
1056 fn current_word_surface_ends_with_extglob_prefix(
1057 &self,
1058 start: Position,
1059 capture: &Option<String>,
1060 ) -> bool {
1061 self.current_word_surface_last_char(start, capture)
1062 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1063 }
1064
1065 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1067 self.skip_whitespace();
1068 let start = self.current_position();
1069 let token = self.next_lexed_token_inner(false)?;
1070 let end = self.current_position();
1071 Some(token.with_span(Span::from_positions(start, end)))
1072 }
1073
1074 pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1076 self.skip_whitespace();
1077 let start = self.current_position();
1078 let token = self.next_lexed_token_inner(true)?;
1079 let end = self.current_position();
1080 Some(token.with_span(Span::from_positions(start, end)))
1081 }
1082
1083 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1085 let ch = self.peek_char()?;
1086
1087 match ch {
1088 '\n' => {
1089 self.consume_ascii_chars(1);
1090 Some(LexedToken::punctuation(TokenKind::Newline))
1091 }
1092 ';' => {
1093 if self.second_char() == Some(';') {
1094 if self.third_char() == Some('&') {
1095 self.consume_ascii_chars(3);
1096 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1098 self.consume_ascii_chars(2);
1099 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1101 } else if self.second_char() == Some('|') {
1102 self.consume_ascii_chars(2);
1103 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1105 self.consume_ascii_chars(2);
1106 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1108 self.consume_ascii_chars(1);
1109 Some(LexedToken::punctuation(TokenKind::Semicolon))
1110 }
1111 }
1112 '|' => {
1113 if self.second_char() == Some('|') {
1114 self.consume_ascii_chars(2);
1115 Some(LexedToken::punctuation(TokenKind::Or))
1116 } else if self.second_char() == Some('&') {
1117 self.consume_ascii_chars(2);
1118 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1119 } else {
1120 self.consume_ascii_chars(1);
1121 Some(LexedToken::punctuation(TokenKind::Pipe))
1122 }
1123 }
1124 '&' => {
1125 if self.second_char() == Some('&') {
1126 self.consume_ascii_chars(2);
1127 Some(LexedToken::punctuation(TokenKind::And))
1128 } else if self.second_char() == Some('>') {
1129 if self.third_char() == Some('>') {
1130 self.consume_ascii_chars(3);
1131 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1132 } else {
1133 self.consume_ascii_chars(2);
1134 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1135 }
1136 } else if self.second_char() == Some('|') {
1137 self.consume_ascii_chars(2);
1138 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1139 } else if self.second_char() == Some('!') {
1140 self.consume_ascii_chars(2);
1141 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1142 } else {
1143 self.consume_ascii_chars(1);
1144 Some(LexedToken::punctuation(TokenKind::Background))
1145 }
1146 }
1147 '>' => {
1148 if self.second_char() == Some('>') {
1149 if self.third_char() == Some('|') {
1150 self.consume_ascii_chars(3);
1151 } else {
1152 self.consume_ascii_chars(2);
1153 }
1154 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1155 } else if self.second_char() == Some('|') {
1156 self.consume_ascii_chars(2);
1157 Some(LexedToken::punctuation(TokenKind::Clobber))
1158 } else if self.second_char() == Some('(') {
1159 self.consume_ascii_chars(2);
1160 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1161 } else if self.second_char() == Some('&') {
1162 self.consume_ascii_chars(2);
1163 Some(LexedToken::punctuation(TokenKind::DupOutput))
1164 } else {
1165 self.consume_ascii_chars(1);
1166 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1167 }
1168 }
1169 '<' => {
1170 if self.second_char() == Some('<') {
1171 if self.third_char() == Some('<') {
1172 self.consume_ascii_chars(3);
1173 Some(LexedToken::punctuation(TokenKind::HereString))
1174 } else if self.third_char() == Some('-') {
1175 self.consume_ascii_chars(3);
1176 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1177 } else {
1178 self.consume_ascii_chars(2);
1179 Some(LexedToken::punctuation(TokenKind::HereDoc))
1180 }
1181 } else if self.second_char() == Some('>') {
1182 self.consume_ascii_chars(2);
1183 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1184 } else if self.second_char() == Some('(') {
1185 self.consume_ascii_chars(2);
1186 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1187 } else if self.second_char() == Some('&') {
1188 self.consume_ascii_chars(2);
1189 Some(LexedToken::punctuation(TokenKind::DupInput))
1190 } else {
1191 self.consume_ascii_chars(1);
1192 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1193 }
1194 }
1195 '(' => {
1196 if self.second_char() == Some('(') {
1197 self.consume_ascii_chars(2);
1198 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1199 } else {
1200 self.consume_ascii_chars(1);
1201 Some(LexedToken::punctuation(TokenKind::LeftParen))
1202 }
1203 }
1204 ')' => {
1205 if self.second_char() == Some(')') {
1206 self.consume_ascii_chars(2);
1207 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1208 } else {
1209 self.consume_ascii_chars(1);
1210 Some(LexedToken::punctuation(TokenKind::RightParen))
1211 }
1212 }
1213 '{' => {
1214 let start = self.current_position();
1215 if self.ignore_braces_enabled() {
1216 self.consume_ascii_chars(1);
1217 match self.peek_char() {
1218 Some(' ') | Some('\t') | Some('\n') | None => {
1219 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1220 }
1221 _ => self.read_word_starting_with("{", start),
1222 }
1223 } else if self.looks_like_brace_expansion() {
1224 self.read_brace_expansion_word()
1228 } else if self.is_brace_group_start() {
1229 self.advance();
1230 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1231 } else if self.brace_literal_starts_case_pattern_delimiter() {
1232 self.read_word_starting_with("{", start)
1233 } else {
1234 self.read_brace_literal_word()
1235 }
1236 }
1237 '}' => {
1238 self.consume_ascii_chars(1);
1239 if self.ignore_close_braces_enabled() {
1240 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1241 } else {
1242 Some(LexedToken::punctuation(TokenKind::RightBrace))
1243 }
1244 }
1245 '[' => {
1246 let start = self.current_position();
1247 self.consume_ascii_chars(1);
1248 if self.peek_char() == Some('[')
1249 && matches!(
1250 self.second_char(),
1251 Some(' ') | Some('\t') | Some('\n') | None
1252 )
1253 {
1254 self.consume_ascii_chars(1);
1255 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1256 } else {
1257 match self.peek_char() {
1264 Some(' ') | Some('\t') | Some('\n') | None => {
1265 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1266 }
1267 _ => self.read_word_starting_with("[", start),
1268 }
1269 }
1270 }
1271 ']' => {
1272 if self.second_char() == Some(']') {
1273 self.consume_ascii_chars(2);
1274 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1275 } else {
1276 self.consume_ascii_chars(1);
1277 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1278 }
1279 }
1280 '\'' => self.read_single_quoted_string(),
1281 '"' => self.read_double_quoted_string(),
1282 '#' => {
1283 if self.should_treat_hash_as_word_char() {
1284 let start = self.current_position();
1285 return self.read_word_starting_with("#", start);
1286 }
1287 if preserve_comments {
1288 self.read_comment();
1289 Some(LexedToken::comment())
1290 } else {
1291 self.skip_comment();
1292 self.next_lexed_token_inner(false)
1293 }
1294 }
1295 '0'..='9' => self.read_word_or_fd_redirect(),
1297 _ => self.read_word(),
1298 }
1299 }
1300
1301 fn skip_whitespace(&mut self) {
1302 while let Some(ch) = self.peek_char() {
1303 if self.reinject_buf.is_empty() {
1304 let whitespace_len = self.source_horizontal_whitespace_len();
1305 if whitespace_len > 0 {
1306 self.consume_source_bytes(whitespace_len);
1307 continue;
1308 }
1309
1310 if self.cursor.rest().starts_with("\\\n") {
1311 self.consume_source_bytes(2);
1312 continue;
1313 }
1314 }
1315
1316 if ch == ' ' || ch == '\t' {
1317 self.consume_ascii_chars(1);
1318 } else if ch == '\\' {
1319 if self.second_char() == Some('\n') {
1321 self.consume_ascii_chars(2);
1322 } else {
1323 break;
1324 }
1325 } else {
1326 break;
1327 }
1328 }
1329 }
1330
1331 fn skip_comment(&mut self) {
1332 if self.reinject_buf.is_empty() {
1333 let end = self
1334 .cursor
1335 .find_byte(b'\n')
1336 .unwrap_or(self.cursor.rest().len());
1337 self.consume_source_bytes(end);
1338 return;
1339 }
1340
1341 while let Some(ch) = self.peek_char() {
1342 if ch == '\n' {
1343 break;
1344 }
1345 self.advance();
1346 }
1347 }
1348
1349 fn read_comment(&mut self) {
1350 debug_assert_eq!(self.peek_char(), Some('#'));
1351
1352 if self.reinject_buf.is_empty() {
1353 let rest = self.cursor.rest();
1354 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1355 self.consume_source_bytes(end);
1356 return;
1357 }
1358
1359 self.advance(); while let Some(ch) = self.peek_char() {
1362 if ch == '\n' {
1363 break;
1364 }
1365 self.advance();
1366 }
1367 }
1368
1369 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1370 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1371 return false;
1372 }
1373
1374 let line_start = self.input[..self.offset]
1375 .rfind('\n')
1376 .map_or(0, |index| index + 1);
1377 let prefix = &self.input[line_start..self.offset];
1378 line_has_unclosed_double_paren(prefix)
1379 }
1380
1381 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1384 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1385 let Some(fd) = first_digit.to_digit(10) else {
1386 unreachable!("peeked ASCII digit should convert to a base-10 digit");
1387 };
1388 let fd = fd as i32;
1389
1390 match (self.second_char(), self.third_char()) {
1391 (Some('>'), Some('>')) => {
1392 if self.fourth_char() == Some('|') {
1393 self.consume_ascii_chars(4);
1394 } else {
1395 self.consume_ascii_chars(3);
1396 }
1397 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1398 }
1399 (Some('>'), Some('|')) => {
1400 self.consume_ascii_chars(3);
1401 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1402 }
1403 (Some('>'), Some('&')) => {
1404 self.consume_ascii_chars(3);
1405
1406 let mut target_str = String::with_capacity(4);
1407 while let Some(c) = self.peek_char() {
1408 if c.is_ascii_digit() {
1409 target_str.push(c);
1410 self.advance();
1411 } else {
1412 break;
1413 }
1414 }
1415
1416 if target_str.is_empty() {
1417 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1418 }
1419
1420 let target_fd: i32 = target_str.parse().unwrap_or(1);
1421 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1422 }
1423 (Some('>'), _) => {
1424 self.consume_ascii_chars(2);
1425 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1426 }
1427 (Some('<'), Some('&')) => {
1428 self.consume_ascii_chars(3);
1429
1430 let mut target_str = String::with_capacity(4);
1431 while let Some(c) = self.peek_char() {
1432 if c.is_ascii_digit() || c == '-' {
1433 target_str.push(c);
1434 self.advance();
1435 if c == '-' {
1436 break;
1437 }
1438 } else {
1439 break;
1440 }
1441 }
1442
1443 if target_str == "-" {
1444 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1445 }
1446 let target_fd: i32 = target_str.parse().unwrap_or(0);
1447 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1448 }
1449 (Some('<'), Some('>')) => {
1450 self.consume_ascii_chars(3);
1451 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1452 }
1453 (Some('<'), Some('<')) => {}
1454 (Some('<'), _) => {
1455 self.consume_ascii_chars(2);
1456 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1457 }
1458 _ => {}
1459 }
1460 }
1461
1462 self.read_word()
1464 }
1465
1466 fn read_word_starting_with(
1467 &mut self,
1468 _prefix: &str,
1469 start: Position,
1470 ) -> Option<LexedToken<'a>> {
1471 let segment = match self.read_unquoted_segment(start) {
1472 Ok(segment) => segment,
1473 Err(kind) => return Some(LexedToken::error(kind)),
1474 };
1475 if segment.as_str().is_empty() {
1476 return None;
1477 }
1478 let mut lexed_word = LexedWord::from_segment(segment);
1479 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1480 return Some(LexedToken::error(kind));
1481 }
1482 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1483 }
1484
1485 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1486 let start = self.current_position();
1487
1488 if self.reinject_buf.is_empty() {
1489 let ascii_len = self.source_ascii_plain_word_len();
1490 let chunk = if ascii_len > 0
1491 && self
1492 .cursor
1493 .rest()
1494 .as_bytes()
1495 .get(ascii_len)
1496 .is_none_or(|byte| byte.is_ascii())
1497 {
1498 self.consume_source_bytes(ascii_len);
1499 &self.input[start.offset..self.offset]
1500 } else {
1501 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1502 self.advance_scanned_source_bytes(chunk.len());
1503 chunk
1504 };
1505 if !chunk.is_empty() {
1506 let continues = matches!(
1507 self.peek_char(),
1508 Some(next)
1509 if Self::is_word_char(next)
1510 || next == '$'
1511 || matches!(next, '\'' | '"')
1512 || next == '{'
1513 || (next == '('
1514 && (chunk.ends_with('=')
1515 || Self::word_can_take_parenthesized_suffix(chunk)))
1516 );
1517
1518 if !continues {
1519 let end = self.current_position();
1520 return Some(LexedToken::borrowed_word(
1521 TokenKind::Word,
1522 &self.input[start.offset..self.offset],
1523 Some(Span::from_positions(start, end)),
1524 ));
1525 }
1526
1527 if self.peek_char() == Some('(')
1528 && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1529 {
1530 return self.read_complex_word(start);
1531 }
1532
1533 let end = self.current_position();
1534 return self.finish_segmented_word(LexedWord::borrowed(
1535 LexedWordSegmentKind::Plain,
1536 &self.input[start.offset..self.offset],
1537 Some(Span::from_positions(start, end)),
1538 ));
1539 }
1540 }
1541
1542 self.read_complex_word(start)
1543 }
1544
1545 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1546 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1547 return Some(LexedToken::error(kind));
1548 }
1549
1550 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1551 }
1552
1553 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1554 if self.peek_char() == Some('$') {
1555 match self.second_char() {
1556 Some('\'') => return self.read_dollar_single_quoted_string(),
1557 Some('"') => return self.read_dollar_double_quoted_string(),
1558 _ => {}
1559 }
1560 }
1561
1562 let segment = match self.read_unquoted_segment(start) {
1563 Ok(segment) => segment,
1564 Err(kind) => return Some(LexedToken::error(kind)),
1565 };
1566
1567 if segment.as_str().is_empty() {
1568 return None;
1569 }
1570
1571 self.finish_segmented_word(LexedWord::from_segment(segment))
1572 }
1573
1574 fn read_unquoted_segment(
1575 &mut self,
1576 start: Position,
1577 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1578 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1579 while let Some(ch) = self.peek_char() {
1580 if ch == '"' || ch == '\'' {
1581 break;
1582 } else if ch == '$' {
1583 if matches!(self.second_char(), Some('\'') | Some('"'))
1584 && (self.current_position().offset > start.offset
1585 || word.as_ref().is_some_and(|word| !word.is_empty()))
1586 {
1587 break;
1588 }
1589
1590 self.advance();
1592
1593 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1597 Self::push_capture_char(&mut word, '[');
1598 self.advance();
1599 if !self.read_legacy_arithmetic_into(&mut word, start) {
1600 return Err(LexerErrorKind::CommandSubstitution);
1601 }
1602 } else if self.peek_char() == Some('(') {
1603 if self.second_char() == Some('(') {
1604 if !self.read_arithmetic_expansion_into(&mut word) {
1605 return Err(LexerErrorKind::CommandSubstitution);
1606 }
1607 } else {
1608 Self::push_capture_char(&mut word, '(');
1609 self.advance();
1610 if !self.read_command_subst_into(&mut word) {
1611 return Err(LexerErrorKind::CommandSubstitution);
1612 }
1613 }
1614 } else if self.peek_char() == Some('{') {
1615 Self::push_capture_char(&mut word, '{');
1618 self.advance();
1619 let _ = self.read_param_expansion_into(&mut word, start);
1620 } else {
1621 if let Some(c) = self.peek_char() {
1623 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1624 || c.is_ascii_digit()
1625 {
1626 Self::push_capture_char(&mut word, c);
1627 self.advance();
1628 } else {
1629 while let Some(c) = self.peek_char() {
1631 if c.is_ascii_alphanumeric() || c == '_' {
1632 Self::push_capture_char(&mut word, c);
1633 self.advance();
1634 } else {
1635 break;
1636 }
1637 }
1638 }
1639 }
1640 }
1641 } else if ch == '{' {
1642 if self.looks_like_mid_word_brace_segment() {
1643 Self::push_capture_char(&mut word, ch);
1646 self.advance();
1647 self.consume_mid_word_brace_segment(&mut word);
1648 } else {
1649 Self::push_capture_char(&mut word, ch);
1652 self.advance();
1653 }
1654 } else if ch == '`' {
1655 let capture_end = self.current_position();
1658 self.ensure_capture_from_source(&mut word, start, capture_end);
1659 Self::push_capture_char(&mut word, ch);
1660 self.advance(); let mut closed = false;
1662 while let Some(c) = self.peek_char() {
1663 Self::push_capture_char(&mut word, c);
1664 self.advance();
1665 if c == '`' {
1666 closed = true;
1667 break;
1668 }
1669 if c == '\\'
1670 && let Some(next) = self.peek_char()
1671 {
1672 Self::push_capture_char(&mut word, next);
1673 self.advance();
1674 }
1675 }
1676 if !closed {
1677 return Err(LexerErrorKind::BacktickSubstitution);
1678 }
1679 } else if ch == '\\' {
1680 let capture_end = self.current_position();
1681 self.ensure_capture_from_source(&mut word, start, capture_end);
1682 self.advance();
1683 if let Some(next) = self.peek_char() {
1684 if next == '\n' {
1685 self.advance();
1687 } else {
1688 Self::push_capture_char(&mut word, '\x00');
1693 Self::push_capture_char(&mut word, next);
1694 self.advance();
1695 if next == '{'
1696 && self.current_word_surface_is_single_char(start, &word, '{')
1697 && self.escaped_brace_sequence_looks_like_brace_expansion()
1698 {
1699 let mut depth = 1;
1700 while let Some(c) = self.peek_char() {
1701 Self::push_capture_char(&mut word, c);
1702 self.advance();
1703 match c {
1704 '{' => depth += 1,
1705 '}' => {
1706 depth -= 1;
1707 if depth == 0 {
1708 break;
1709 }
1710 }
1711 _ => {}
1712 }
1713 }
1714 }
1715 }
1716 } else {
1717 Self::push_capture_char(&mut word, '\\');
1718 }
1719 } else if ch == '('
1720 && self.current_word_surface_ends_with_char(start, &word, '=')
1721 && self.looks_like_assoc_assign()
1722 {
1723 Self::push_capture_char(&mut word, ch);
1726 self.advance();
1727 let mut depth = 1;
1728 while let Some(c) = self.peek_char() {
1729 Self::push_capture_char(&mut word, c);
1730 self.advance();
1731 match c {
1732 '(' => depth += 1,
1733 ')' => {
1734 depth -= 1;
1735 if depth == 0 {
1736 break;
1737 }
1738 }
1739 '"' => {
1740 while let Some(qc) = self.peek_char() {
1741 Self::push_capture_char(&mut word, qc);
1742 self.advance();
1743 if qc == '"' {
1744 break;
1745 }
1746 if qc == '\\'
1747 && let Some(esc) = self.peek_char()
1748 {
1749 Self::push_capture_char(&mut word, esc);
1750 self.advance();
1751 }
1752 }
1753 }
1754 '\'' => {
1755 while let Some(qc) = self.peek_char() {
1756 Self::push_capture_char(&mut word, qc);
1757 self.advance();
1758 if qc == '\'' {
1759 break;
1760 }
1761 }
1762 }
1763 '\\' => {
1764 if let Some(esc) = self.peek_char() {
1765 Self::push_capture_char(&mut word, esc);
1766 self.advance();
1767 }
1768 }
1769 _ => {}
1770 }
1771 }
1772 } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1773 {
1774 Self::push_capture_char(&mut word, ch);
1777 self.advance();
1778 let mut depth = 1;
1779 while let Some(c) = self.peek_char() {
1780 Self::push_capture_char(&mut word, c);
1781 self.advance();
1782 match c {
1783 '(' => depth += 1,
1784 ')' => {
1785 depth -= 1;
1786 if depth == 0 {
1787 break;
1788 }
1789 }
1790 '\\' => {
1791 if let Some(esc) = self.peek_char() {
1792 Self::push_capture_char(&mut word, esc);
1793 self.advance();
1794 }
1795 }
1796 _ => {}
1797 }
1798 }
1799 } else if Self::is_plain_word_char(ch) {
1800 if self.reinject_buf.is_empty() {
1801 let ascii_len = self.source_ascii_plain_word_len();
1802 let chunk = if ascii_len > 0
1803 && self
1804 .cursor
1805 .rest()
1806 .as_bytes()
1807 .get(ascii_len)
1808 .is_none_or(|byte| byte.is_ascii())
1809 {
1810 self.consume_source_bytes(ascii_len);
1811 &self.input[self.offset - ascii_len..self.offset]
1812 } else {
1813 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1814 self.advance_scanned_source_bytes(chunk.len());
1815 chunk
1816 };
1817 Self::push_capture_str(&mut word, chunk);
1818 } else {
1819 Self::push_capture_char(&mut word, ch);
1820 self.advance();
1821 }
1822 } else {
1823 break;
1824 }
1825 }
1826
1827 if let Some(word) = word {
1828 let span = Some(Span::from_positions(start, self.current_position()));
1829 Ok(LexedWordSegment::owned_with_spans(
1830 LexedWordSegmentKind::Plain,
1831 word,
1832 span,
1833 span,
1834 ))
1835 } else {
1836 let end = self.current_position();
1837 Ok(LexedWordSegment::borrowed(
1838 LexedWordSegmentKind::Plain,
1839 &self.input[start.offset..self.offset],
1840 Some(Span::from_positions(start, end)),
1841 ))
1842 }
1843 }
1844
1845 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1846 let segment = match self.read_single_quoted_segment() {
1847 Ok(segment) => segment,
1848 Err(kind) => return Some(LexedToken::error(kind)),
1849 };
1850 let mut word = LexedWord::from_segment(segment);
1851 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1852 return Some(LexedToken::error(kind));
1853 }
1854
1855 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1856 }
1857
1858 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1859 debug_assert_eq!(self.peek_char(), Some('\''));
1860
1861 let wrapper_start = self.current_position();
1862 self.consume_ascii_chars(1); let content_start = self.current_position();
1864 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1865 let mut content_end = content_start;
1866 let mut content = String::with_capacity(16);
1867 let mut closed = false;
1868
1869 if can_borrow {
1870 let rest = self.cursor.rest();
1871 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1872 self.consume_source_bytes(quote_index);
1873 content_end = self.current_position();
1874 self.consume_ascii_chars(1); closed = true;
1876 } else {
1877 self.consume_source_bytes(rest.len());
1878 }
1879 }
1880
1881 while let Some(ch) = self.peek_char() {
1882 if closed {
1883 break;
1884 }
1885 if ch == '\'' {
1886 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1887 if !can_borrow {
1888 content.push('\'');
1889 }
1890 self.advance();
1891 self.advance();
1892 continue;
1893 }
1894 content_end = self.current_position();
1895 self.consume_ascii_chars(1); closed = true;
1897 break;
1898 }
1899 if !can_borrow {
1900 content.push(ch);
1901 }
1902 self.advance();
1903 }
1904
1905 if !closed {
1906 return Err(LexerErrorKind::SingleQuote);
1907 }
1908
1909 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1910 let content_span = Some(Span::from_positions(content_start, content_end));
1911
1912 if can_borrow {
1913 Ok(LexedWordSegment::borrowed_with_spans(
1914 LexedWordSegmentKind::SingleQuoted,
1915 &self.input[content_start.offset..content_end.offset],
1916 content_span,
1917 wrapper_span,
1918 ))
1919 } else {
1920 Ok(LexedWordSegment::owned_with_spans(
1921 LexedWordSegmentKind::SingleQuoted,
1922 content,
1923 content_span,
1924 wrapper_span,
1925 ))
1926 }
1927 }
1928
1929 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1930 let segment = match self.read_dollar_single_quoted_segment() {
1931 Ok(segment) => segment,
1932 Err(kind) => return Some(LexedToken::error(kind)),
1933 };
1934 let mut word = LexedWord::from_segment(segment);
1935 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1936 return Some(LexedToken::error(kind));
1937 }
1938
1939 let kind = if word.single_segment().is_some() {
1940 TokenKind::LiteralWord
1941 } else {
1942 TokenKind::Word
1943 };
1944
1945 Some(LexedToken::with_word_payload(kind, word))
1946 }
1947
1948 fn read_dollar_single_quoted_segment(
1949 &mut self,
1950 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1951 debug_assert_eq!(self.peek_char(), Some('$'));
1952 debug_assert_eq!(self.second_char(), Some('\''));
1953
1954 let wrapper_start = self.current_position();
1955 self.consume_ascii_chars(2); let content_start = self.current_position();
1957 let mut out = String::with_capacity(16);
1958
1959 while let Some(ch) = self.peek_char() {
1960 if ch == '\'' {
1961 let content_end = self.current_position();
1962 self.advance();
1963 let wrapper_span =
1964 Some(Span::from_positions(wrapper_start, self.current_position()));
1965 let content_span = Some(Span::from_positions(content_start, content_end));
1966 return Ok(LexedWordSegment::owned_with_spans(
1967 LexedWordSegmentKind::DollarSingleQuoted,
1968 out,
1969 content_span,
1970 wrapper_span,
1971 ));
1972 }
1973
1974 if ch == '\\' {
1975 self.advance();
1976 if let Some(esc) = self.peek_char() {
1977 self.advance();
1978 match esc {
1979 'n' => out.push('\n'),
1980 't' => out.push('\t'),
1981 'r' => out.push('\r'),
1982 'a' => out.push('\x07'),
1983 'b' => out.push('\x08'),
1984 'f' => out.push('\x0C'),
1985 'v' => out.push('\x0B'),
1986 'e' | 'E' => out.push('\x1B'),
1987 '\\' => out.push('\\'),
1988 '\'' => out.push('\''),
1989 '"' => out.push('"'),
1990 '?' => out.push('?'),
1991 'c' => {
1992 if let Some(control) = self.peek_char() {
1993 self.advance();
1994 out.push(((control as u32 & 0x1F) as u8) as char);
1995 } else {
1996 out.push('\\');
1997 out.push('c');
1998 }
1999 }
2000 'x' => {
2001 let mut hex = String::new();
2002 for _ in 0..2 {
2003 if let Some(h) = self.peek_char() {
2004 if h.is_ascii_hexdigit() {
2005 hex.push(h);
2006 self.advance();
2007 } else {
2008 break;
2009 }
2010 }
2011 }
2012 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2013 out.push(val as char);
2014 }
2015 }
2016 'u' => {
2017 let mut hex = String::new();
2018 for _ in 0..4 {
2019 if let Some(h) = self.peek_char() {
2020 if h.is_ascii_hexdigit() {
2021 hex.push(h);
2022 self.advance();
2023 } else {
2024 break;
2025 }
2026 }
2027 }
2028 if let Ok(val) = u32::from_str_radix(&hex, 16)
2029 && let Some(c) = char::from_u32(val)
2030 {
2031 out.push(c);
2032 }
2033 }
2034 'U' => {
2035 let mut hex = String::new();
2036 for _ in 0..8 {
2037 if let Some(h) = self.peek_char() {
2038 if h.is_ascii_hexdigit() {
2039 hex.push(h);
2040 self.advance();
2041 } else {
2042 break;
2043 }
2044 }
2045 }
2046 if let Ok(val) = u32::from_str_radix(&hex, 16)
2047 && let Some(c) = char::from_u32(val)
2048 {
2049 out.push(c);
2050 }
2051 }
2052 '0'..='7' => {
2053 let mut oct = String::new();
2054 oct.push(esc);
2055 for _ in 0..2 {
2056 if let Some(o) = self.peek_char() {
2057 if o.is_ascii_digit() && o < '8' {
2058 oct.push(o);
2059 self.advance();
2060 } else {
2061 break;
2062 }
2063 }
2064 }
2065 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2066 out.push(val as char);
2067 }
2068 }
2069 _ => {
2070 out.push('\\');
2071 out.push(esc);
2072 }
2073 }
2074 } else {
2075 out.push('\\');
2076 }
2077 continue;
2078 }
2079
2080 out.push(ch);
2081 self.advance();
2082 }
2083
2084 Err(LexerErrorKind::SingleQuote)
2085 }
2086
2087 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2088 let start = self.current_position();
2089
2090 if self.reinject_buf.is_empty() {
2091 let ascii_len = self.source_ascii_plain_word_len();
2092 let chunk = if ascii_len > 0
2093 && self
2094 .cursor
2095 .rest()
2096 .as_bytes()
2097 .get(ascii_len)
2098 .is_none_or(|byte| byte.is_ascii())
2099 {
2100 self.consume_source_bytes(ascii_len);
2101 &self.input[start.offset..self.offset]
2102 } else {
2103 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2104 self.advance_scanned_source_bytes(chunk.len());
2105 chunk
2106 };
2107 if chunk.is_empty() {
2108 return None;
2109 }
2110
2111 let end = self.current_position();
2112 return Some(LexedWordSegment::borrowed(
2113 LexedWordSegmentKind::Plain,
2114 &self.input[start.offset..self.offset],
2115 Some(Span::from_positions(start, end)),
2116 ));
2117 }
2118
2119 let ch = self.peek_char()?;
2120 if !Self::is_plain_word_char(ch) {
2121 return None;
2122 }
2123
2124 let mut text = String::with_capacity(16);
2125 while let Some(ch) = self.peek_char() {
2126 if !Self::is_plain_word_char(ch) {
2127 break;
2128 }
2129 text.push(ch);
2130 self.advance();
2131 }
2132
2133 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2134 }
2135
2136 fn append_segmented_continuation(
2139 &mut self,
2140 word: &mut LexedWord<'a>,
2141 ) -> Result<(), LexerErrorKind> {
2142 loop {
2143 match self.peek_char() {
2144 Some('\'') => {
2145 word.push_segment(self.read_single_quoted_segment()?);
2146 }
2147 Some('"') => {
2148 word.push_segment(self.read_double_quoted_segment()?);
2149 }
2150 Some('$') if self.second_char() == Some('\'') => {
2151 word.push_segment(self.read_dollar_single_quoted_segment()?);
2152 }
2153 Some('$') if self.second_char() == Some('"') => {
2154 word.push_segment(self.read_dollar_double_quoted_segment()?);
2155 }
2156 Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2157 let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2158 unreachable!("peeked '(' should produce a suffix segment");
2159 };
2160 word.push_segment(segment);
2161 }
2162 _ => {
2163 if let Some(segment) = self.read_plain_continuation_segment() {
2164 word.push_segment(segment);
2165 continue;
2166 }
2167
2168 let start = self.current_position();
2169 let plain = self.read_unquoted_segment(start)?;
2170 if plain.as_str().is_empty() {
2171 break;
2172 }
2173 word.push_segment(plain);
2174 }
2175 }
2176 }
2177
2178 Ok(())
2179 }
2180
2181 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2182 debug_assert_eq!(self.peek_char(), Some('('));
2183
2184 let start = self.current_position();
2185 let mut depth = 0usize;
2186 let mut escaped = false;
2187 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2188
2189 while let Some(ch) = self.peek_char() {
2190 if let Some(text) = text.as_mut() {
2191 text.push(ch);
2192 }
2193 self.advance();
2194
2195 if escaped {
2196 escaped = false;
2197 continue;
2198 }
2199
2200 match ch {
2201 '\\' => escaped = true,
2202 '(' => depth += 1,
2203 ')' => {
2204 depth = depth.saturating_sub(1);
2205 if depth == 0 {
2206 break;
2207 }
2208 }
2209 _ => {}
2210 }
2211 }
2212
2213 let end = self.current_position();
2214 let span = Some(Span::from_positions(start, end));
2215 if let Some(text) = text {
2216 Some(LexedWordSegment::owned_with_spans(
2217 LexedWordSegmentKind::Plain,
2218 text,
2219 span,
2220 span,
2221 ))
2222 } else {
2223 Some(LexedWordSegment::borrowed_with_spans(
2224 LexedWordSegmentKind::Plain,
2225 &self.input[start.offset..end.offset],
2226 span,
2227 span,
2228 ))
2229 }
2230 }
2231
2232 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2233 self.read_double_quoted_word(false)
2234 }
2235
2236 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2237 self.read_double_quoted_word(true)
2238 }
2239
2240 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2241 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2242 Ok(segment) => segment,
2243 Err(kind) => return Some(LexedToken::error(kind)),
2244 };
2245 let mut word = LexedWord::from_segment(segment);
2246 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2247 return Some(LexedToken::error(kind));
2248 }
2249
2250 let kind = if word.single_segment().is_some() {
2251 TokenKind::QuotedWord
2252 } else {
2253 TokenKind::Word
2254 };
2255
2256 Some(LexedToken::with_word_payload(kind, word))
2257 }
2258
2259 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2260 self.read_double_quoted_segment_with_dollar(false)
2261 }
2262
2263 fn read_dollar_double_quoted_segment(
2264 &mut self,
2265 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2266 self.read_double_quoted_segment_with_dollar(true)
2267 }
2268
2269 fn read_double_quoted_segment_with_dollar(
2270 &mut self,
2271 dollar: bool,
2272 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2273 if dollar {
2274 debug_assert_eq!(self.peek_char(), Some('$'));
2275 debug_assert_eq!(self.second_char(), Some('"'));
2276 } else {
2277 debug_assert_eq!(self.peek_char(), Some('"'));
2278 }
2279
2280 let wrapper_start = self.current_position();
2281 if dollar {
2282 self.consume_ascii_chars(2); } else {
2284 self.consume_ascii_chars(1); }
2286 let content_start = self.current_position();
2287 let mut content_end = content_start;
2288 let mut simple = self.reinject_buf.is_empty();
2289 let mut borrowable = self.reinject_buf.is_empty();
2290 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2291 let mut closed = false;
2292
2293 while let Some(ch) = self.peek_char() {
2294 if simple {
2295 if self.reinject_buf.is_empty() {
2296 let rest = self.cursor.rest();
2297 match Self::find_double_quote_special(rest) {
2298 Some(index) if index > 0 => {
2299 self.consume_source_bytes(index);
2300 continue;
2301 }
2302 None => {
2303 self.consume_source_bytes(rest.len());
2304 return Err(LexerErrorKind::DoubleQuote);
2305 }
2306 _ => {}
2307 }
2308 }
2309
2310 match ch {
2311 '"' => {
2312 content_end = self.current_position();
2313 self.consume_ascii_chars(1); closed = true;
2315 break;
2316 }
2317 '\\' | '$' | '`' => {
2318 simple = false;
2319 if ch == '`' {
2320 borrowable = false;
2321 let capture_end = self.current_position();
2322 self.ensure_capture_from_source(
2323 &mut content,
2324 content_start,
2325 capture_end,
2326 );
2327 }
2328 }
2329 _ => {
2330 self.advance();
2331 }
2332 }
2333 if simple {
2334 continue;
2335 }
2336 }
2337
2338 match ch {
2339 '"' => {
2340 if borrowable {
2341 content_end = self.current_position();
2342 }
2343 self.consume_ascii_chars(1); closed = true;
2345 break;
2346 }
2347 '\\' => {
2348 let escape_start = self.current_position();
2349 self.advance();
2350 if let Some(next) = self.peek_char() {
2351 match next {
2352 '\n' => {
2353 borrowable = false;
2354 self.ensure_capture_from_source(
2355 &mut content,
2356 content_start,
2357 escape_start,
2358 );
2359 self.advance();
2360 }
2361 '$' => {
2362 borrowable = false;
2363 self.ensure_capture_from_source(
2364 &mut content,
2365 content_start,
2366 escape_start,
2367 );
2368 Self::push_capture_char(&mut content, '\x00');
2369 Self::push_capture_char(&mut content, '$');
2370 self.advance();
2371 }
2372 '"' | '\\' | '`' => {
2373 borrowable = false;
2374 self.ensure_capture_from_source(
2375 &mut content,
2376 content_start,
2377 escape_start,
2378 );
2379 if next == '\\' {
2380 Self::push_capture_char(&mut content, '\x00');
2381 }
2382 if next == '`' {
2383 Self::push_capture_char(&mut content, '\x00');
2384 }
2385 Self::push_capture_char(&mut content, next);
2386 self.advance();
2387 content_end = self.current_position();
2388 }
2389 _ => {
2390 Self::push_capture_char(&mut content, '\\');
2391 Self::push_capture_char(&mut content, next);
2392 self.advance();
2393 content_end = self.current_position();
2394 }
2395 }
2396 }
2397 }
2398 '$' => {
2399 Self::push_capture_char(&mut content, '$');
2400 self.advance();
2401 if self.peek_char() == Some('(') {
2402 if self.second_char() == Some('(') {
2403 self.read_arithmetic_expansion_into(&mut content);
2404 } else {
2405 Self::push_capture_char(&mut content, '(');
2406 self.advance();
2407 self.read_command_subst_into(&mut content);
2408 }
2409 } else if self.peek_char() == Some('{') {
2410 Self::push_capture_char(&mut content, '{');
2411 self.advance();
2412 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2413 }
2414 content_end = self.current_position();
2415 }
2416 '`' => {
2417 borrowable = false;
2418 let capture_end = self.current_position();
2419 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2420 Self::push_capture_char(&mut content, '`');
2421 self.advance(); while let Some(c) = self.peek_char() {
2423 Self::push_capture_char(&mut content, c);
2424 self.advance();
2425 if c == '`' {
2426 break;
2427 }
2428 if c == '\\'
2429 && let Some(next) = self.peek_char()
2430 {
2431 Self::push_capture_char(&mut content, next);
2432 self.advance();
2433 }
2434 }
2435 content_end = self.current_position();
2436 }
2437 _ => {
2438 Self::push_capture_char(&mut content, ch);
2439 self.advance();
2440 content_end = self.current_position();
2441 }
2442 }
2443 }
2444
2445 if !closed {
2446 return Err(LexerErrorKind::DoubleQuote);
2447 }
2448
2449 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2450 let content_span = Some(Span::from_positions(content_start, content_end));
2451
2452 if borrowable {
2453 Ok(LexedWordSegment::borrowed_with_spans(
2454 if dollar {
2455 LexedWordSegmentKind::DollarDoubleQuoted
2456 } else {
2457 LexedWordSegmentKind::DoubleQuoted
2458 },
2459 &self.input[content_start.offset..content_end.offset],
2460 content_span,
2461 wrapper_span,
2462 ))
2463 } else {
2464 Ok(LexedWordSegment::owned_with_spans(
2465 if dollar {
2466 LexedWordSegmentKind::DollarDoubleQuoted
2467 } else {
2468 LexedWordSegmentKind::DoubleQuoted
2469 },
2470 content.unwrap_or_default(),
2471 content_span,
2472 wrapper_span,
2473 ))
2474 }
2475 }
2476
2477 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2478 debug_assert_eq!(self.peek_char(), Some('('));
2479 debug_assert_eq!(self.second_char(), Some('('));
2480
2481 Self::push_capture_char(content, '(');
2482 self.advance();
2483 Self::push_capture_char(content, '(');
2484 self.advance();
2485
2486 let mut depth = 2;
2487 while let Some(c) = self.peek_char() {
2488 match c {
2489 '\\' => {
2490 Self::push_capture_char(content, c);
2491 self.advance();
2492 if let Some(next) = self.peek_char() {
2493 Self::push_capture_char(content, next);
2494 self.advance();
2495 }
2496 }
2497 '\'' => {
2498 Self::push_capture_char(content, c);
2499 self.advance();
2500 while let Some(quoted) = self.peek_char() {
2501 Self::push_capture_char(content, quoted);
2502 self.advance();
2503 if quoted == '\'' {
2504 break;
2505 }
2506 }
2507 }
2508 '"' => {
2509 let mut escaped = false;
2510 Self::push_capture_char(content, c);
2511 self.advance();
2512 while let Some(quoted) = self.peek_char() {
2513 Self::push_capture_char(content, quoted);
2514 self.advance();
2515 if escaped {
2516 escaped = false;
2517 continue;
2518 }
2519 match quoted {
2520 '\\' => escaped = true,
2521 '"' => break,
2522 _ => {}
2523 }
2524 }
2525 }
2526 '`' => {
2527 let mut escaped = false;
2528 Self::push_capture_char(content, c);
2529 self.advance();
2530 while let Some(quoted) = self.peek_char() {
2531 Self::push_capture_char(content, quoted);
2532 self.advance();
2533 if escaped {
2534 escaped = false;
2535 continue;
2536 }
2537 match quoted {
2538 '\\' => escaped = true,
2539 '`' => break,
2540 _ => {}
2541 }
2542 }
2543 }
2544 '(' => {
2545 Self::push_capture_char(content, c);
2546 self.advance();
2547 depth += 1;
2548 }
2549 ')' => {
2550 Self::push_capture_char(content, c);
2551 self.advance();
2552 depth -= 1;
2553 if depth == 0 {
2554 return true;
2555 }
2556 }
2557 _ => {
2558 Self::push_capture_char(content, c);
2559 self.advance();
2560 }
2561 }
2562 }
2563
2564 false
2565 }
2566
2567 fn read_legacy_arithmetic_into(
2568 &mut self,
2569 content: &mut Option<String>,
2570 segment_start: Position,
2571 ) -> bool {
2572 let mut bracket_depth = 1;
2573
2574 while let Some(c) = self.peek_char() {
2575 match c {
2576 '\\' => {
2577 Self::push_capture_char(content, c);
2578 self.advance();
2579 if let Some(next) = self.peek_char() {
2580 Self::push_capture_char(content, next);
2581 self.advance();
2582 }
2583 }
2584 '\'' => {
2585 Self::push_capture_char(content, c);
2586 self.advance();
2587 while let Some(quoted) = self.peek_char() {
2588 Self::push_capture_char(content, quoted);
2589 self.advance();
2590 if quoted == '\'' {
2591 break;
2592 }
2593 }
2594 }
2595 '"' => {
2596 let mut escaped = false;
2597 Self::push_capture_char(content, c);
2598 self.advance();
2599 while let Some(quoted) = self.peek_char() {
2600 Self::push_capture_char(content, quoted);
2601 self.advance();
2602 if escaped {
2603 escaped = false;
2604 continue;
2605 }
2606 match quoted {
2607 '\\' => escaped = true,
2608 '"' => break,
2609 _ => {}
2610 }
2611 }
2612 }
2613 '`' => {
2614 let mut escaped = false;
2615 Self::push_capture_char(content, c);
2616 self.advance();
2617 while let Some(quoted) = self.peek_char() {
2618 Self::push_capture_char(content, quoted);
2619 self.advance();
2620 if escaped {
2621 escaped = false;
2622 continue;
2623 }
2624 match quoted {
2625 '\\' => escaped = true,
2626 '`' => break,
2627 _ => {}
2628 }
2629 }
2630 }
2631 '[' => {
2632 Self::push_capture_char(content, c);
2633 self.advance();
2634 bracket_depth += 1;
2635 }
2636 ']' => {
2637 Self::push_capture_char(content, c);
2638 self.advance();
2639 bracket_depth -= 1;
2640 if bracket_depth == 0 {
2641 return true;
2642 }
2643 }
2644 '$' => {
2645 Self::push_capture_char(content, c);
2646 self.advance();
2647 if self.peek_char() == Some('(') {
2648 if self.second_char() == Some('(') {
2649 if !self.read_arithmetic_expansion_into(content) {
2650 return false;
2651 }
2652 } else {
2653 Self::push_capture_char(content, '(');
2654 self.advance();
2655 if !self.read_command_subst_into(content) {
2656 return false;
2657 }
2658 }
2659 } else if self.peek_char() == Some('{') {
2660 Self::push_capture_char(content, '{');
2661 self.advance();
2662 if !self.read_param_expansion_into(content, segment_start) {
2663 return false;
2664 }
2665 } else if self.peek_char() == Some('[') {
2666 Self::push_capture_char(content, '[');
2667 self.advance();
2668 if !self.read_legacy_arithmetic_into(content, segment_start) {
2669 return false;
2670 }
2671 }
2672 }
2673 _ => {
2674 Self::push_capture_char(content, c);
2675 self.advance();
2676 }
2677 }
2678 }
2679
2680 false
2681 }
2682
2683 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2687 self.read_command_subst_into_depth(content, 0)
2688 }
2689
2690 fn flush_command_subst_keyword(
2691 current_word: &mut String,
2692 pending_case_headers: &mut usize,
2693 case_clause_depths: &mut SmallVec<[usize; 4]>,
2694 depth: usize,
2695 word_started_at_command_start: &mut bool,
2696 ) {
2697 if current_word.is_empty() {
2698 *word_started_at_command_start = false;
2699 return;
2700 }
2701
2702 match current_word.as_str() {
2703 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2704 "in" if *pending_case_headers > 0 => {
2705 *pending_case_headers -= 1;
2706 case_clause_depths.push(depth);
2707 }
2708 "esac" if *word_started_at_command_start => {
2709 case_clause_depths.pop();
2710 }
2711 _ => {}
2712 }
2713
2714 current_word.clear();
2715 *word_started_at_command_start = false;
2716 }
2717
2718 fn read_command_subst_heredoc_delimiter_into(
2719 &mut self,
2720 content: &mut Option<String>,
2721 ) -> Option<String> {
2722 while let Some(ch) = self.peek_char() {
2723 if !matches!(ch, ' ' | '\t') {
2724 break;
2725 }
2726 Self::push_capture_char(content, ch);
2727 self.advance();
2728 }
2729
2730 let mut cooked = String::new();
2731 let mut in_single = false;
2732 let mut in_double = false;
2733 let mut escaped = false;
2734 let mut saw_any = false;
2735
2736 while let Some(ch) = self.peek_char() {
2737 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2738 break;
2739 }
2740
2741 saw_any = true;
2742 Self::push_capture_char(content, ch);
2743 self.advance();
2744
2745 if escaped {
2746 cooked.push(ch);
2747 escaped = false;
2748 continue;
2749 }
2750
2751 match ch {
2752 '\\' if !in_single => escaped = true,
2753 '\'' if !in_double => in_single = !in_single,
2754 '"' if !in_single => in_double = !in_double,
2755 _ => cooked.push(ch),
2756 }
2757 }
2758
2759 saw_any.then_some(cooked)
2760 }
2761
2762 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2763 Self::push_capture_char(content, '`');
2764 self.advance();
2765 while let Some(ch) = self.peek_char() {
2766 Self::push_capture_char(content, ch);
2767 self.advance();
2768 if ch == '\\' {
2769 if let Some(esc) = self.peek_char() {
2770 Self::push_capture_char(content, esc);
2771 self.advance();
2772 }
2773 continue;
2774 }
2775 if ch == '`' {
2776 break;
2777 }
2778 }
2779 }
2780
2781 fn read_command_subst_pending_heredoc_into(
2782 &mut self,
2783 content: &mut Option<String>,
2784 delimiter: &str,
2785 strip_tabs: bool,
2786 ) -> bool {
2787 loop {
2788 let mut line = String::new();
2789 let mut saw_newline = false;
2790
2791 while let Some(ch) = self.peek_char() {
2792 self.advance();
2793 if ch == '\n' {
2794 saw_newline = true;
2795 break;
2796 }
2797 line.push(ch);
2798 }
2799
2800 Self::push_capture_str(content, &line);
2801 if saw_newline {
2802 Self::push_capture_char(content, '\n');
2803 }
2804
2805 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2806 return true;
2807 }
2808 }
2809 }
2810
2811 fn read_command_subst_into_depth(
2812 &mut self,
2813 content: &mut Option<String>,
2814 subst_depth: usize,
2815 ) -> bool {
2816 if subst_depth >= self.max_subst_depth {
2817 let mut depth = 1;
2819 while let Some(c) = self.peek_char() {
2820 self.advance();
2821 match c {
2822 '(' => depth += 1,
2823 ')' => {
2824 depth -= 1;
2825 if depth == 0 {
2826 Self::push_capture_char(content, ')');
2827 return true;
2828 }
2829 }
2830 _ => {}
2831 }
2832 }
2833 return false;
2834 }
2835
2836 let mut depth = 1;
2837 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2838 let mut pending_case_headers = 0usize;
2839 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2840 let mut current_word = String::with_capacity(16);
2841 let mut at_command_start = true;
2842 let mut expecting_redirection_target = false;
2843 let mut current_word_started_at_command_start = false;
2844 while let Some(c) = self.peek_char() {
2845 match c {
2846 '#' if !self.should_treat_hash_as_word_char() => {
2847 let had_word = !current_word.is_empty();
2848 Self::flush_command_subst_keyword(
2849 &mut current_word,
2850 &mut pending_case_headers,
2851 &mut case_clause_depths,
2852 depth,
2853 &mut current_word_started_at_command_start,
2854 );
2855 if had_word && expecting_redirection_target {
2856 expecting_redirection_target = false;
2857 }
2858 Self::push_capture_char(content, '#');
2859 self.advance();
2860 while let Some(comment_ch) = self.peek_char() {
2861 Self::push_capture_char(content, comment_ch);
2862 self.advance();
2863 if comment_ch == '\n' {
2864 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2865 if !self.read_command_subst_pending_heredoc_into(
2866 content, &delimiter, strip_tabs,
2867 ) {
2868 return false;
2869 }
2870 }
2871 at_command_start = true;
2872 expecting_redirection_target = false;
2873 break;
2874 }
2875 }
2876 }
2877 '(' => {
2878 Self::flush_command_subst_keyword(
2879 &mut current_word,
2880 &mut pending_case_headers,
2881 &mut case_clause_depths,
2882 depth,
2883 &mut current_word_started_at_command_start,
2884 );
2885 depth += 1;
2886 Self::push_capture_char(content, c);
2887 self.advance();
2888 at_command_start = true;
2889 expecting_redirection_target = false;
2890 }
2891 ')' => {
2892 Self::flush_command_subst_keyword(
2893 &mut current_word,
2894 &mut pending_case_headers,
2895 &mut case_clause_depths,
2896 depth,
2897 &mut current_word_started_at_command_start,
2898 );
2899 if case_clause_depths
2900 .last()
2901 .is_some_and(|case_depth| *case_depth == depth)
2902 {
2903 Self::push_capture_char(content, ')');
2904 self.advance();
2905 at_command_start = true;
2906 expecting_redirection_target = false;
2907 continue;
2908 }
2909 depth -= 1;
2910 self.advance();
2911 if depth == 0 {
2912 Self::push_capture_char(content, ')');
2913 return true;
2914 }
2915 Self::push_capture_char(content, c);
2916 at_command_start = false;
2917 expecting_redirection_target = false;
2918 }
2919 '"' => {
2920 let had_word = !current_word.is_empty();
2921 Self::flush_command_subst_keyword(
2922 &mut current_word,
2923 &mut pending_case_headers,
2924 &mut case_clause_depths,
2925 depth,
2926 &mut current_word_started_at_command_start,
2927 );
2928 if had_word && expecting_redirection_target {
2929 expecting_redirection_target = false;
2930 }
2931 Self::push_capture_char(content, '"');
2933 self.advance();
2934 while let Some(qc) = self.peek_char() {
2935 match qc {
2936 '"' => {
2937 Self::push_capture_char(content, '"');
2938 self.advance();
2939 break;
2940 }
2941 '\\' => {
2942 Self::push_capture_char(content, '\\');
2943 self.advance();
2944 if let Some(esc) = self.peek_char() {
2945 Self::push_capture_char(content, esc);
2946 self.advance();
2947 }
2948 }
2949 '$' => {
2950 Self::push_capture_char(content, '$');
2951 self.advance();
2952 if self.peek_char() == Some('(') {
2953 if self.second_char() == Some('(') {
2954 if !self.read_arithmetic_expansion_into(content) {
2955 return false;
2956 }
2957 } else {
2958 Self::push_capture_char(content, '(');
2959 self.advance();
2960 if !self
2961 .read_command_subst_into_depth(content, subst_depth + 1)
2962 {
2963 return false;
2964 }
2965 }
2966 }
2967 }
2968 _ => {
2969 Self::push_capture_char(content, qc);
2970 self.advance();
2971 }
2972 }
2973 }
2974 if expecting_redirection_target {
2975 expecting_redirection_target = false;
2976 } else {
2977 at_command_start = false;
2978 }
2979 }
2980 '\'' => {
2981 let had_word = !current_word.is_empty();
2982 Self::flush_command_subst_keyword(
2983 &mut current_word,
2984 &mut pending_case_headers,
2985 &mut case_clause_depths,
2986 depth,
2987 &mut current_word_started_at_command_start,
2988 );
2989 if had_word && expecting_redirection_target {
2990 expecting_redirection_target = false;
2991 }
2992 Self::push_capture_char(content, '\'');
2994 self.advance();
2995 while let Some(qc) = self.peek_char() {
2996 Self::push_capture_char(content, qc);
2997 self.advance();
2998 if qc == '\'' {
2999 break;
3000 }
3001 }
3002 if expecting_redirection_target {
3003 expecting_redirection_target = false;
3004 } else {
3005 at_command_start = false;
3006 }
3007 }
3008 '`' => {
3009 let had_word = !current_word.is_empty();
3010 Self::flush_command_subst_keyword(
3011 &mut current_word,
3012 &mut pending_case_headers,
3013 &mut case_clause_depths,
3014 depth,
3015 &mut current_word_started_at_command_start,
3016 );
3017 if had_word && expecting_redirection_target {
3018 expecting_redirection_target = false;
3019 }
3020 self.read_command_subst_backtick_segment_into(content);
3021 if expecting_redirection_target {
3022 expecting_redirection_target = false;
3023 } else {
3024 at_command_start = false;
3025 }
3026 }
3027 '$' if self.second_char() == Some('\'') => {
3028 let had_word = !current_word.is_empty();
3029 Self::flush_command_subst_keyword(
3030 &mut current_word,
3031 &mut pending_case_headers,
3032 &mut case_clause_depths,
3033 depth,
3034 &mut current_word_started_at_command_start,
3035 );
3036 if had_word && expecting_redirection_target {
3037 expecting_redirection_target = false;
3038 }
3039 Self::push_capture_char(content, '$');
3040 self.advance();
3041 Self::push_capture_char(content, '\'');
3042 self.advance();
3043 while let Some(qc) = self.peek_char() {
3044 Self::push_capture_char(content, qc);
3045 self.advance();
3046 if qc == '\\' {
3047 if let Some(esc) = self.peek_char() {
3048 Self::push_capture_char(content, esc);
3049 self.advance();
3050 }
3051 continue;
3052 }
3053 if qc == '\'' {
3054 break;
3055 }
3056 }
3057 if expecting_redirection_target {
3058 expecting_redirection_target = false;
3059 } else {
3060 at_command_start = false;
3061 }
3062 }
3063 '\\' => {
3064 let had_word = !current_word.is_empty();
3065 Self::flush_command_subst_keyword(
3066 &mut current_word,
3067 &mut pending_case_headers,
3068 &mut case_clause_depths,
3069 depth,
3070 &mut current_word_started_at_command_start,
3071 );
3072 if had_word && expecting_redirection_target {
3073 expecting_redirection_target = false;
3074 }
3075 Self::push_capture_char(content, '\\');
3076 self.advance();
3077 if let Some(esc) = self.peek_char() {
3078 Self::push_capture_char(content, esc);
3079 self.advance();
3080 }
3081 if expecting_redirection_target {
3082 expecting_redirection_target = false;
3083 } else {
3084 at_command_start = false;
3085 }
3086 }
3087 '<' if self.second_char() == Some('<') => {
3088 let word_was_redirection_fd = current_word_started_at_command_start
3089 && !current_word.is_empty()
3090 && current_word.chars().all(|current| current.is_ascii_digit());
3091 Self::flush_command_subst_keyword(
3092 &mut current_word,
3093 &mut pending_case_headers,
3094 &mut case_clause_depths,
3095 depth,
3096 &mut current_word_started_at_command_start,
3097 );
3098 if word_was_redirection_fd {
3099 at_command_start = true;
3100 }
3101
3102 Self::push_capture_char(content, '<');
3103 self.advance();
3104 Self::push_capture_char(content, '<');
3105 self.advance();
3106
3107 if self.peek_char() == Some('<') {
3108 Self::push_capture_char(content, '<');
3109 self.advance();
3110 expecting_redirection_target = true;
3111 continue;
3112 }
3113
3114 let strip_tabs = if self.peek_char() == Some('-') {
3115 Self::push_capture_char(content, '-');
3116 self.advance();
3117 true
3118 } else {
3119 false
3120 };
3121
3122 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3123 {
3124 pending_heredocs.push((delimiter, strip_tabs));
3125 expecting_redirection_target = false;
3126 } else {
3127 expecting_redirection_target = true;
3128 }
3129 }
3130 '>' | '<' => {
3131 let word_was_redirection_fd = current_word_started_at_command_start
3132 && !current_word.is_empty()
3133 && current_word.chars().all(|current| current.is_ascii_digit());
3134 Self::flush_command_subst_keyword(
3135 &mut current_word,
3136 &mut pending_case_headers,
3137 &mut case_clause_depths,
3138 depth,
3139 &mut current_word_started_at_command_start,
3140 );
3141 if word_was_redirection_fd {
3142 at_command_start = true;
3143 }
3144 Self::push_capture_char(content, c);
3145 self.advance();
3146 expecting_redirection_target = true;
3147 }
3148 '\n' => {
3149 Self::flush_command_subst_keyword(
3150 &mut current_word,
3151 &mut pending_case_headers,
3152 &mut case_clause_depths,
3153 depth,
3154 &mut current_word_started_at_command_start,
3155 );
3156 Self::push_capture_char(content, '\n');
3157 self.advance();
3158 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3159 if !self.read_command_subst_pending_heredoc_into(
3160 content, &delimiter, strip_tabs,
3161 ) {
3162 return false;
3163 }
3164 }
3165 at_command_start = true;
3166 expecting_redirection_target = false;
3167 }
3168 _ => {
3169 if c.is_ascii_alphanumeric() || c == '_' {
3170 if current_word.is_empty()
3171 && !expecting_redirection_target
3172 && at_command_start
3173 {
3174 current_word_started_at_command_start = true;
3175 at_command_start = false;
3176 }
3177 current_word.push(c);
3178 } else {
3179 let had_word = !current_word.is_empty();
3180 Self::flush_command_subst_keyword(
3181 &mut current_word,
3182 &mut pending_case_headers,
3183 &mut case_clause_depths,
3184 depth,
3185 &mut current_word_started_at_command_start,
3186 );
3187 if had_word && expecting_redirection_target {
3188 expecting_redirection_target = false;
3189 }
3190 match c {
3191 ' ' | '\t' => {}
3192 ';' | '|' | '&' => {
3193 at_command_start = true;
3194 expecting_redirection_target = false;
3195 }
3196 _ => {
3197 if !expecting_redirection_target {
3198 at_command_start = false;
3199 }
3200 }
3201 }
3202 }
3203 Self::push_capture_char(content, c);
3204 self.advance();
3205 }
3206 }
3207 }
3208
3209 false
3210 }
3211
3212 fn read_param_expansion_into(
3216 &mut self,
3217 content: &mut Option<String>,
3218 segment_start: Position,
3219 ) -> bool {
3220 let mut borrowable = true;
3221 let mut depth = 1;
3222 let mut literal_brace_depth = 0usize;
3223 let mut in_single = false;
3224 let mut in_double = false;
3225 let mut double_quote_depth = 0usize;
3226 while let Some(c) = self.peek_char() {
3227 if in_single {
3228 match c {
3229 '\\' => {
3230 let escape_start = self.current_position();
3231 if self.second_char() == Some('"') {
3232 self.advance();
3233 borrowable = false;
3234 self.ensure_capture_from_source(content, segment_start, escape_start);
3235 Self::push_capture_char(content, '"');
3236 self.advance();
3237 } else {
3238 Self::push_capture_char(content, '\\');
3239 self.advance();
3240 }
3241 }
3242 '\'' => {
3243 Self::push_capture_char(content, c);
3244 self.advance();
3245 in_single = false;
3246 }
3247 _ => {
3248 Self::push_capture_char(content, c);
3249 self.advance();
3250 }
3251 }
3252 continue;
3253 }
3254
3255 match c {
3256 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3257 self.advance();
3258 Self::push_capture_char(content, '}');
3259 if depth == 1
3260 && literal_brace_depth > 0
3261 && self.has_later_top_level_param_expansion_closer(depth)
3262 {
3263 literal_brace_depth -= 1;
3264 continue;
3265 }
3266 depth -= 1;
3267 if depth == 0 {
3268 break;
3269 }
3270 }
3271 '{' if !in_single && !in_double => {
3272 literal_brace_depth += 1;
3273 Self::push_capture_char(content, '{');
3274 self.advance();
3275 }
3276 '"' => {
3277 Self::push_capture_char(content, '"');
3279 self.advance();
3280 in_double = !in_double;
3281 double_quote_depth = if in_double { depth } else { 0 };
3282 }
3283 '\'' => {
3284 Self::push_capture_char(content, '\'');
3285 self.advance();
3286 if !in_double {
3287 in_single = true;
3288 }
3289 }
3290 '\\' => {
3291 let escape_start = self.current_position();
3294 self.advance();
3295 if let Some(esc) = self.peek_char() {
3296 match esc {
3297 '$' => {
3298 borrowable = false;
3299 self.ensure_capture_from_source(
3300 content,
3301 segment_start,
3302 escape_start,
3303 );
3304 Self::push_capture_char(content, '\x00');
3305 Self::push_capture_char(content, '$');
3306 self.advance();
3307 }
3308 '"' | '\\' | '`' => {
3309 borrowable = false;
3310 self.ensure_capture_from_source(
3311 content,
3312 segment_start,
3313 escape_start,
3314 );
3315 Self::push_capture_char(content, esc);
3316 self.advance();
3317 }
3318 '}' => {
3319 Self::push_capture_char(content, '\\');
3321 Self::push_capture_char(content, '}');
3322 self.advance();
3323 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3324 }
3325 _ => {
3326 Self::push_capture_char(content, '\\');
3327 Self::push_capture_char(content, esc);
3328 self.advance();
3329 }
3330 }
3331 } else {
3332 Self::push_capture_char(content, '\\');
3333 }
3334 }
3335 '$' => {
3336 Self::push_capture_char(content, '$');
3337 self.advance();
3338 if self.peek_char() == Some('(') {
3339 if self.second_char() == Some('(') {
3340 if !self.read_arithmetic_expansion_into(content) {
3341 borrowable = false;
3342 }
3343 } else {
3344 Self::push_capture_char(content, '(');
3345 self.advance();
3346 self.read_command_subst_into(content);
3347 }
3348 } else if self.peek_char() == Some('{') {
3349 Self::push_capture_char(content, '{');
3350 self.advance();
3351 borrowable &= self.read_param_expansion_into(content, segment_start);
3352 }
3353 }
3354 _ => {
3355 Self::push_capture_char(content, c);
3356 self.advance();
3357 }
3358 }
3359 }
3360 borrowable
3361 }
3362
3363 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3364 let mut chars = self.lookahead_chars().peekable();
3365 let mut depth = target_depth;
3366 let mut in_single = false;
3367 let mut in_double = false;
3368 let mut double_quote_depth = 0usize;
3369
3370 while let Some(ch) = chars.next() {
3371 if in_single {
3372 match ch {
3373 '\'' => in_single = false,
3374 '\\' if chars.peek() == Some(&'"') => {
3375 chars.next();
3376 }
3377 '\\' => {}
3378 _ => {}
3379 }
3380 continue;
3381 }
3382
3383 if in_double {
3384 match ch {
3385 '"' => {
3386 in_double = false;
3387 double_quote_depth = 0;
3388 }
3389 '\\' => {
3390 chars.next();
3391 }
3392 '$' if chars.peek() == Some(&'{') => {
3393 chars.next();
3394 depth += 1;
3395 }
3396 '}' if depth > double_quote_depth => {
3397 depth -= 1;
3398 }
3399 _ => {}
3400 }
3401 continue;
3402 }
3403
3404 match ch {
3405 '\n' if depth == target_depth => return false,
3406 '\'' => in_single = true,
3407 '"' => {
3408 in_double = true;
3409 double_quote_depth = depth;
3410 }
3411 '\\' => {
3412 chars.next();
3413 }
3414 '$' if chars.peek() == Some(&'{') => {
3415 chars.next();
3416 depth += 1;
3417 }
3418 '}' => {
3419 if depth == target_depth {
3420 return true;
3421 }
3422 depth -= 1;
3423 }
3424 _ => {}
3425 }
3426 }
3427
3428 false
3429 }
3430
3431 fn looks_like_brace_expansion(&self) -> bool {
3437 const MAX_LOOKAHEAD: usize = 10_000;
3438
3439 let mut chars = self.lookahead_chars();
3440
3441 if chars.next() != Some('{') {
3443 return false;
3444 }
3445
3446 let mut depth = 1;
3447 let mut paren_depth = 0usize;
3448 let mut has_comma = false;
3449 let mut has_dot_dot = false;
3450 let mut escaped = false;
3451 let mut in_single = false;
3452 let mut in_double = false;
3453 let mut in_backtick = false;
3454 let mut prev_char = None;
3455 let mut scanned = 0usize;
3456
3457 for ch in chars {
3458 scanned += 1;
3459 if scanned > MAX_LOOKAHEAD {
3460 return false;
3461 }
3462
3463 let brace_surface_active = !in_single && !in_double && !in_backtick;
3464 let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3465
3466 match ch {
3467 _ if escaped => {
3468 escaped = false;
3469 }
3470 '\\' if !in_single => escaped = true,
3471 '\'' if !in_double && !in_backtick => in_single = !in_single,
3472 '"' if !in_single && !in_backtick => in_double = !in_double,
3473 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3474 '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3475 paren_depth += 1
3476 }
3477 ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3478 '{' if !in_single && !in_double && !in_backtick => depth += 1,
3479 '}' if !in_single && !in_double && !in_backtick => {
3480 depth -= 1;
3481 if depth == 0 {
3482 return has_comma || has_dot_dot;
3484 }
3485 }
3486 ',' if at_top_level => has_comma = true,
3487 '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3488 ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3490 _ => {}
3491 }
3492 prev_char = Some(ch);
3493 }
3494
3495 false
3496 }
3497
3498 fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3499 let mut brace_depth = 1usize;
3500 let mut paren_depth = 0usize;
3501 let mut escaped = false;
3502 let mut in_single = false;
3503 let mut in_double = false;
3504 let mut in_backtick = false;
3505 let mut prev_char = None;
3506
3507 while let Some(ch) = self.peek_char() {
3508 Self::push_capture_char(word, ch);
3509 self.advance();
3510
3511 if escaped {
3512 escaped = false;
3513 prev_char = Some(ch);
3514 continue;
3515 }
3516
3517 match ch {
3518 '\\' if !in_single => escaped = true,
3519 '\'' if !in_double && !in_backtick => in_single = !in_single,
3520 '"' if !in_single && !in_backtick => in_double = !in_double,
3521 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3522 '(' if !in_single
3523 && !in_double
3524 && !in_backtick
3525 && (paren_depth > 0 || prev_char == Some('$')) =>
3526 {
3527 paren_depth += 1
3528 }
3529 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3530 paren_depth -= 1
3531 }
3532 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3533 '}' if !in_single && !in_double && !in_backtick => {
3534 brace_depth -= 1;
3535 if brace_depth == 0 {
3536 break;
3537 }
3538 }
3539 _ => {}
3540 }
3541
3542 prev_char = Some(ch);
3543 }
3544 }
3545
3546 fn consume_brace_word_body(&mut self, word: &mut String) {
3547 let mut brace_depth = 1usize;
3548 let mut paren_depth = 0usize;
3549 let mut escaped = false;
3550 let mut in_single = false;
3551 let mut in_double = false;
3552 let mut in_backtick = false;
3553 let mut prev_char = None;
3554
3555 while let Some(ch) = self.peek_char() {
3556 word.push(ch);
3557 self.advance();
3558
3559 if escaped {
3560 escaped = false;
3561 prev_char = Some(ch);
3562 continue;
3563 }
3564
3565 match ch {
3566 '\\' if !in_single => escaped = true,
3567 '\'' if !in_double && !in_backtick => in_single = !in_single,
3568 '"' if !in_single && !in_backtick => in_double = !in_double,
3569 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3570 '(' if !in_single
3571 && !in_double
3572 && !in_backtick
3573 && (paren_depth > 0 || prev_char == Some('$')) =>
3574 {
3575 paren_depth += 1
3576 }
3577 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3578 paren_depth -= 1
3579 }
3580 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3581 '}' if !in_single && !in_double && !in_backtick => {
3582 brace_depth -= 1;
3583 if brace_depth == 0 {
3584 break;
3585 }
3586 }
3587 _ => {}
3588 }
3589
3590 prev_char = Some(ch);
3591 }
3592 }
3593
3594 fn looks_like_mid_word_brace_segment(&self) -> bool {
3597 const MAX_LOOKAHEAD: usize = 10_000;
3598
3599 let mut chars = self.lookahead_chars();
3600 if chars.next() != Some('{') {
3601 return false;
3602 }
3603
3604 let mut brace_depth = 1;
3605 let mut paren_depth = 0usize;
3606 let mut escaped = false;
3607 let mut in_single = false;
3608 let mut in_double = false;
3609 let mut in_backtick = false;
3610 let mut prev_char = None;
3611 let mut scanned = 0usize;
3612
3613 for ch in chars {
3614 scanned += 1;
3615 if scanned > MAX_LOOKAHEAD {
3616 return false;
3617 }
3618
3619 if !in_single
3620 && !in_double
3621 && !in_backtick
3622 && !escaped
3623 && brace_depth == 1
3624 && paren_depth == 0
3625 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3626 {
3627 return false;
3628 }
3629
3630 if escaped {
3631 escaped = false;
3632 prev_char = Some(ch);
3633 continue;
3634 }
3635
3636 match ch {
3637 '\\' => escaped = true,
3638 '\'' if !in_double && !in_backtick => in_single = !in_single,
3639 '"' if !in_single && !in_backtick => in_double = !in_double,
3640 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3641 '(' if !in_single
3642 && !in_double
3643 && !in_backtick
3644 && (paren_depth > 0 || prev_char == Some('$')) =>
3645 {
3646 paren_depth += 1
3647 }
3648 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3649 paren_depth -= 1
3650 }
3651 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3652 '}' if !in_single && !in_double && !in_backtick => {
3653 brace_depth -= 1;
3654 if brace_depth == 0 {
3655 return true;
3656 }
3657 }
3658 _ => {}
3659 }
3660
3661 prev_char = Some(ch);
3662 }
3663
3664 false
3665 }
3666
3667 fn is_brace_group_start(&self) -> bool {
3669 let mut chars = self.lookahead_chars();
3670 if chars.next() != Some('{') {
3672 return false;
3673 }
3674 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3676 }
3677
3678 fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3681 const MAX_LOOKAHEAD: usize = 10_000;
3682
3683 let mut chars = self.lookahead_chars();
3684 let mut depth = 1;
3685 let mut has_comma = false;
3686 let mut has_dot_dot = false;
3687 let mut prev_char = None;
3688 let mut scanned = 0usize;
3689
3690 for ch in chars.by_ref() {
3691 scanned += 1;
3692 if scanned > MAX_LOOKAHEAD {
3693 return false;
3694 }
3695 match ch {
3696 '{' => depth += 1,
3697 '}' => {
3698 depth -= 1;
3699 if depth == 0 {
3700 return has_comma || has_dot_dot;
3701 }
3702 }
3703 ',' if depth == 1 => has_comma = true,
3704 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3705 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3706 _ => {}
3707 }
3708 prev_char = Some(ch);
3709 }
3710
3711 false
3712 }
3713
3714 fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3715 let mut chars = self.lookahead_chars();
3716 if chars.next() != Some('{') {
3717 return false;
3718 }
3719 chars.next() == Some(')')
3720 }
3721
3722 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3724 let mut word = String::with_capacity(16);
3725
3726 if let Some('{') = self.peek_char() {
3727 word.push('{');
3728 self.advance();
3729 } else {
3730 return None;
3731 }
3732
3733 self.consume_brace_word_body(&mut word);
3734
3735 while let Some(ch) = self.peek_char() {
3736 if Self::is_word_char(ch) {
3737 if self.reinject_buf.is_empty() {
3738 let chunk = self.cursor.eat_while(Self::is_word_char);
3739 word.push_str(chunk);
3740 self.advance_scanned_source_bytes(chunk.len());
3741 } else {
3742 word.push(ch);
3743 self.advance();
3744 }
3745 } else {
3746 break;
3747 }
3748 }
3749
3750 Some(LexedToken::owned_word(TokenKind::Word, word))
3751 }
3752
3753 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3755 let mut word = String::with_capacity(16);
3756
3757 if let Some('{') = self.peek_char() {
3759 word.push('{');
3760 self.advance();
3761 } else {
3762 return None;
3763 }
3764
3765 self.consume_brace_word_body(&mut word);
3767
3768 while let Some(ch) = self.peek_char() {
3770 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3771 if ch == '{' {
3772 word.push(ch);
3774 self.advance();
3775 self.consume_brace_word_body(&mut word);
3776 } else {
3777 word.push(ch);
3778 self.advance();
3779 }
3780 } else {
3781 break;
3782 }
3783 }
3784
3785 Some(LexedToken::owned_word(TokenKind::Word, word))
3786 }
3787
3788 fn looks_like_assoc_assign(&self) -> bool {
3792 let mut chars = self.lookahead_chars();
3793 if chars.next() != Some('(') {
3795 return false;
3796 }
3797 for ch in chars {
3799 match ch {
3800 ' ' | '\t' => continue,
3801 '[' => return true,
3802 _ => return false,
3803 }
3804 }
3805 false
3806 }
3807
3808 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3809 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3810 }
3811
3812 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3813 word.segments().any(|segment| {
3814 matches!(
3815 segment.kind(),
3816 LexedWordSegmentKind::SingleQuoted
3817 | LexedWordSegmentKind::DollarSingleQuoted
3818 | LexedWordSegmentKind::DoubleQuoted
3819 | LexedWordSegmentKind::DollarDoubleQuoted
3820 )
3821 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3822 }
3823
3824 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3825 text.contains(['*', '?'])
3826 || text.ends_with('}') && text.contains("${")
3827 || text.ends_with(']')
3828 && text
3829 .rfind('[')
3830 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3831 }
3832
3833 fn is_word_char(ch: char) -> bool {
3834 !matches!(
3835 ch,
3836 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3837 )
3838 }
3839
3840 const fn is_ascii_word_byte(byte: u8) -> bool {
3841 !matches!(
3842 byte,
3843 b' ' | b'\t'
3844 | b'\n'
3845 | b';'
3846 | b'|'
3847 | b'&'
3848 | b'>'
3849 | b'<'
3850 | b'('
3851 | b')'
3852 | b'{'
3853 | b'}'
3854 | b'\''
3855 | b'"'
3856 )
3857 }
3858
3859 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3860 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3861 }
3862
3863 fn is_plain_word_char(ch: char) -> bool {
3864 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3865 }
3866
3867 pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3869 let mut content = String::with_capacity(64);
3870 let mut current_line = String::with_capacity(64);
3871
3872 let mut rest_of_line = String::with_capacity(32);
3879 let rest_of_line_start = self.current_position();
3880 let mut in_double_quote = false;
3881 let mut in_single_quote = false;
3882 let mut in_comment = false;
3883 let mut saw_non_whitespace_tail = false;
3884 let mut consecutive_backslashes = 0usize;
3885 let mut previous_tail_char = None;
3886 while let Some(ch) = self.peek_char() {
3887 self.advance();
3888 if in_comment {
3889 if ch == '\n' {
3890 break;
3891 }
3892 rest_of_line.push(ch);
3893 previous_tail_char = Some(ch);
3894 continue;
3895 }
3896 if ch == '#'
3897 && !in_single_quote
3898 && !in_double_quote
3899 && self.comments_enabled()
3900 && heredoc_tail_hash_starts_comment(previous_tail_char)
3901 {
3902 in_comment = true;
3903 rest_of_line.push(ch);
3904 previous_tail_char = Some(ch);
3905 consecutive_backslashes = 0;
3906 continue;
3907 }
3908 let backslash_continues_line = ch == '\\'
3909 && !in_single_quote
3910 && self.peek_char() == Some('\n')
3911 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3912 && consecutive_backslashes.is_multiple_of(2);
3913 if backslash_continues_line {
3914 rest_of_line.push(ch);
3915 rest_of_line.push('\n');
3916 self.advance();
3917 consecutive_backslashes = 0;
3918 continue;
3919 }
3920 if ch == '\n' && !in_double_quote && !in_single_quote {
3921 break;
3922 }
3923 if ch == '"' && !in_single_quote {
3924 in_double_quote = !in_double_quote;
3925 } else if ch == '\'' && !in_double_quote {
3926 in_single_quote = !in_single_quote;
3927 } else if ch == '\\' && in_double_quote {
3928 rest_of_line.push(ch);
3930 if let Some(next) = self.peek_char() {
3931 rest_of_line.push(next);
3932 self.advance();
3933 }
3934 continue;
3935 }
3936 rest_of_line.push(ch);
3937 if !ch.is_whitespace() {
3938 saw_non_whitespace_tail = true;
3939 }
3940 if ch == '\\' && !in_single_quote {
3941 consecutive_backslashes += 1;
3942 } else {
3943 consecutive_backslashes = 0;
3944 }
3945 previous_tail_char = Some(ch);
3946 }
3947
3948 self.sync_offset_to_cursor();
3952 let content_start = self.current_position();
3953 let mut current_line_start = content_start;
3954 let content_end;
3955
3956 loop {
3958 if self.reinject_buf.is_empty() {
3959 self.sync_offset_to_cursor();
3965 let rest = self.cursor.rest();
3966 if rest.is_empty() {
3967 content_end = self.current_position();
3968 break;
3969 }
3970
3971 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3972 let line = &rest[..line_len];
3973 let has_newline = line_len < rest.len();
3974
3975 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3976 content_end = current_line_start;
3977 self.consume_source_bytes(line_len);
3978 if has_newline {
3979 self.consume_ascii_chars(1);
3980 }
3981 break;
3982 }
3983
3984 content.push_str(line);
3985 self.consume_source_bytes(line_len);
3986
3987 if has_newline {
3988 self.consume_ascii_chars(1);
3989 content.push('\n');
3990 current_line_start = self.current_position();
3991 continue;
3992 }
3993
3994 content_end = self.current_position();
3995 break;
3996 }
3997
3998 match self.peek_char() {
3999 Some('\n') => {
4000 self.advance();
4001 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4003 content_end = current_line_start;
4004 break;
4005 }
4006 content.push_str(¤t_line);
4007 content.push('\n');
4008 current_line.clear();
4009 current_line_start = self.current_position();
4010 }
4011 Some(ch) => {
4012 current_line.push(ch);
4013 self.advance();
4014 }
4015 None => {
4016 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4018 content_end = current_line_start;
4019 break;
4020 }
4021 if !current_line.is_empty() {
4022 content.push_str(¤t_line);
4023 }
4024 content_end = self.current_position();
4025 break;
4026 }
4027 }
4028 }
4029
4030 let post_heredoc_offset = self.offset;
4035 self.offset = rest_of_line_start.offset;
4036 for ch in rest_of_line.chars() {
4037 self.reinject_buf.push_back(ch);
4038 }
4039 self.reinject_buf.push_back('\n');
4040 self.reinject_resume_offset = Some(post_heredoc_offset);
4041
4042 HeredocRead {
4043 content,
4044 content_span: Span::from_positions(content_start, content_end),
4045 }
4046 }
4047
4048 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4049 let mut chars = self.cursor.rest().chars();
4050 if chars.next() != Some('\n') {
4051 return false;
4052 }
4053
4054 for ch in chars {
4055 if matches!(ch, ' ' | '\t') {
4056 continue;
4057 }
4058 if ch == '\n' {
4059 return false;
4060 }
4061 return matches!(ch, '|' | '&' | ';' | '<' | '>')
4062 || (ch == '#' && self.comments_enabled());
4063 }
4064
4065 false
4066 }
4067}
4068
4069fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4070 let line = if strip_tabs {
4071 line.trim_start_matches('\t')
4072 } else {
4073 line
4074 };
4075
4076 if line == delimiter {
4077 return true;
4078 }
4079
4080 let Some(trailing) = line.strip_prefix(delimiter) else {
4081 return false;
4082 };
4083
4084 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4085}
4086
4087fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4088 previous_tail_char.is_none_or(|prev| {
4089 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4090 })
4091}
4092
4093fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4094 let ch = input.get(index..)?.chars().next()?;
4095 Some((ch, index + ch.len_utf8()))
4096}
4097
4098fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4099 let mut index = 0usize;
4100 let mut depth = 0usize;
4101 let mut in_single = false;
4102 let mut in_double = false;
4103 let mut in_backtick = false;
4104 let mut escaped = false;
4105
4106 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4107 let was_escaped = escaped;
4108 if ch == '\\' && !in_single {
4109 escaped = !escaped;
4110 index = next_index;
4111 continue;
4112 }
4113 escaped = false;
4114
4115 match ch {
4116 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4117 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4118 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4119 '(' if !in_single
4120 && !in_double
4121 && !in_backtick
4122 && !was_escaped
4123 && prefix[next_index..].starts_with('(') =>
4124 {
4125 depth += 1;
4126 index = next_index + '('.len_utf8();
4127 continue;
4128 }
4129 ')' if !in_single
4130 && !in_double
4131 && !in_backtick
4132 && !was_escaped
4133 && prefix[next_index..].starts_with(')') =>
4134 {
4135 depth = depth.saturating_sub(1);
4136 index = next_index + ')'.len_utf8();
4137 continue;
4138 }
4139 _ => {}
4140 }
4141
4142 index = next_index;
4143 }
4144
4145 depth > 0
4146}
4147
4148fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4149 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4150 let prefix = &input[line_start..index];
4151 line_has_unclosed_double_paren(prefix)
4152}
4153
4154fn hash_starts_comment(input: &str, index: usize) -> bool {
4155 if inside_unclosed_double_paren_on_line(input, index) {
4156 return false;
4157 }
4158
4159 let next = &input[index + '#'.len_utf8()..];
4160 input[..index]
4161 .chars()
4162 .next_back()
4163 .is_none_or(|prev| match prev {
4164 '(' => {
4165 let whitespace_index = next.find(char::is_whitespace);
4166 let close_index = next.find(')');
4167
4168 match (whitespace_index, close_index) {
4169 (Some(whitespace), Some(close)) => whitespace < close,
4170 (Some(_), None) | (None, None) => true,
4171 (None, Some(_)) => false,
4172 }
4173 }
4174 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4175 })
4176}
4177
4178fn heredoc_delimiter_is_terminator(
4179 ch: char,
4180 in_single: bool,
4181 in_double: bool,
4182 escaped: bool,
4183) -> bool {
4184 !in_single
4185 && !in_double
4186 && !escaped
4187 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4188}
4189
4190fn scan_double_quoted_command_substitution_segment(
4191 input: &str,
4192 mut index: usize,
4193 subst_depth: usize,
4194) -> Option<usize> {
4195 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4196 match ch {
4197 '"' => return Some(next_index),
4198 '\\' => {
4199 index = next_index;
4200 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4201 index = escaped_next;
4202 }
4203 }
4204 '$' if input[next_index..].starts_with('{') => {
4205 let consumed = scan_command_subst_parameter_expansion_len(
4206 &input[next_index + '{'.len_utf8()..],
4207 subst_depth,
4208 )?;
4209 index = next_index + '{'.len_utf8() + consumed;
4210 }
4211 '$' if input[next_index..].starts_with('(')
4212 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4213 {
4214 let consumed = scan_command_substitution_body_len_inner(
4215 &input[next_index + '('.len_utf8()..],
4216 subst_depth + 1,
4217 )?;
4218 index = next_index + '('.len_utf8() + consumed;
4219 }
4220 _ => index = next_index,
4221 }
4222 }
4223
4224 None
4225}
4226
4227fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4228 let mut index = 0usize;
4229 let mut in_single = false;
4230 let mut in_double = false;
4231 let mut in_ansi_c_single = false;
4232 let mut in_backtick = false;
4233 let mut escaped = false;
4234 let mut ansi_c_quote_pending = false;
4235
4236 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4237 let was_escaped = escaped;
4238 if ch == '\\' && !in_single {
4239 escaped = !escaped;
4240 index = next_index;
4241 ansi_c_quote_pending = false;
4242 continue;
4243 }
4244 escaped = false;
4245
4246 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4247 if input[next_index..].starts_with('{')
4248 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4249 &input[next_index + '{'.len_utf8()..],
4250 subst_depth,
4251 )
4252 {
4253 index = next_index + '{'.len_utf8() + consumed;
4254 ansi_c_quote_pending = false;
4255 continue;
4256 }
4257
4258 if input[next_index..].starts_with('(')
4259 && !input[next_index + '('.len_utf8()..].starts_with('(')
4260 && let Some(consumed) = scan_command_substitution_body_len_inner(
4261 &input[next_index + '('.len_utf8()..],
4262 subst_depth + 1,
4263 )
4264 {
4265 index = next_index + '('.len_utf8() + consumed;
4266 ansi_c_quote_pending = false;
4267 continue;
4268 }
4269 }
4270
4271 if !in_single
4272 && !in_ansi_c_single
4273 && !in_double
4274 && !in_backtick
4275 && !was_escaped
4276 && matches!(ch, '<' | '>')
4277 && input[next_index..].starts_with('(')
4278 && let Some(consumed) = scan_command_substitution_body_len_inner(
4279 &input[next_index + '('.len_utf8()..],
4280 subst_depth + 1,
4281 )
4282 {
4283 index = next_index + '('.len_utf8() + consumed;
4284 ansi_c_quote_pending = false;
4285 continue;
4286 }
4287
4288 match ch {
4289 '\'' if !in_double && !in_backtick && !was_escaped => {
4290 if in_ansi_c_single {
4291 in_ansi_c_single = false;
4292 } else if !in_single && ansi_c_quote_pending {
4293 in_ansi_c_single = true;
4294 } else {
4295 in_single = !in_single;
4296 }
4297 }
4298 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4299 in_double = !in_double
4300 }
4301 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4302 in_backtick = !in_backtick
4303 }
4304 '}' if !in_single
4305 && !in_ansi_c_single
4306 && !in_double
4307 && !in_backtick
4308 && !was_escaped =>
4309 {
4310 return Some(next_index);
4311 }
4312 _ => {}
4313 }
4314
4315 ansi_c_quote_pending = ch == '$'
4316 && !in_single
4317 && !in_ansi_c_single
4318 && !in_double
4319 && !in_backtick
4320 && !was_escaped;
4321 index = next_index;
4322 }
4323
4324 None
4325}
4326
4327fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4328 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4329 if !matches!(ch, ' ' | '\t') {
4330 break;
4331 }
4332 index = next_index;
4333 }
4334
4335 let start = index;
4336 let mut cooked = String::new();
4337 let mut in_single = false;
4338 let mut in_double = false;
4339 let mut escaped = false;
4340
4341 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4342 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4343 break;
4344 }
4345
4346 index = next_index;
4347 if escaped {
4348 cooked.push(ch);
4349 escaped = false;
4350 continue;
4351 }
4352
4353 match ch {
4354 '\\' if !in_single => escaped = true,
4355 '\'' if !in_double => in_single = !in_single,
4356 '"' if !in_single => in_double = !in_double,
4357 _ => cooked.push(ch),
4358 }
4359 }
4360
4361 (index > start).then_some((index, cooked))
4362}
4363
4364fn skip_command_subst_pending_heredoc(
4365 input: &str,
4366 mut index: usize,
4367 delimiter: &str,
4368 strip_tabs: bool,
4369) -> usize {
4370 while index <= input.len() {
4371 let rest = &input[index..];
4372 let line_len = rest.find('\n').unwrap_or(rest.len());
4373 let line = &rest[..line_len];
4374 let has_newline = line_len < rest.len();
4375
4376 index += line_len;
4377 if has_newline {
4378 index += '\n'.len_utf8();
4379 }
4380
4381 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4382 return index;
4383 }
4384 }
4385
4386 index
4387}
4388
4389fn scan_command_subst_ansi_c_single_quoted_segment(
4390 input: &str,
4391 quote_index: usize,
4392) -> Option<usize> {
4393 let mut index = quote_index + '\''.len_utf8();
4394
4395 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4396 index = next_index;
4397 if ch == '\\' {
4398 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4399 index = escaped_next;
4400 }
4401 continue;
4402 }
4403
4404 if ch == '\'' {
4405 return Some(index);
4406 }
4407 }
4408
4409 None
4410}
4411
4412fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4413 let mut index = start;
4414
4415 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4416 index = next_index;
4417 if ch == '\\' {
4418 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4419 index = escaped_next;
4420 }
4421 continue;
4422 }
4423
4424 if ch == '`' {
4425 return Some(index);
4426 }
4427 }
4428
4429 None
4430}
4431
4432fn flush_scanned_command_subst_keyword(
4433 current_word: &mut String,
4434 pending_case_headers: &mut usize,
4435 case_clause_depths: &mut SmallVec<[usize; 4]>,
4436 depth: usize,
4437 word_started_at_command_start: &mut bool,
4438) {
4439 if current_word.is_empty() {
4440 *word_started_at_command_start = false;
4441 return;
4442 }
4443
4444 match current_word.as_str() {
4445 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4446 "in" if *pending_case_headers > 0 => {
4447 *pending_case_headers -= 1;
4448 case_clause_depths.push(depth);
4449 }
4450 "esac" if *word_started_at_command_start => {
4451 case_clause_depths.pop();
4452 }
4453 _ => {}
4454 }
4455
4456 current_word.clear();
4457 *word_started_at_command_start = false;
4458}
4459
4460fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4461 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4462 return None;
4463 }
4464
4465 let mut index = 0usize;
4466 let mut depth = 1;
4467 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4468 let mut pending_case_headers = 0usize;
4469 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4470 let mut current_word = String::with_capacity(16);
4471 let mut at_command_start = true;
4472 let mut expecting_redirection_target = false;
4473 let mut current_word_started_at_command_start = false;
4474
4475 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4476 match ch {
4477 '#' if hash_starts_comment(input, index) => {
4478 let had_word = !current_word.is_empty();
4479 flush_scanned_command_subst_keyword(
4480 &mut current_word,
4481 &mut pending_case_headers,
4482 &mut case_clause_depths,
4483 depth,
4484 &mut current_word_started_at_command_start,
4485 );
4486 if had_word && expecting_redirection_target {
4487 expecting_redirection_target = false;
4488 }
4489 index = next_index;
4490 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4491 index = comment_next;
4492 if comment_ch == '\n' {
4493 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4494 index = skip_command_subst_pending_heredoc(
4495 input, index, &delimiter, strip_tabs,
4496 );
4497 }
4498 at_command_start = true;
4499 expecting_redirection_target = false;
4500 break;
4501 }
4502 }
4503 }
4504 '(' => {
4505 flush_scanned_command_subst_keyword(
4506 &mut current_word,
4507 &mut pending_case_headers,
4508 &mut case_clause_depths,
4509 depth,
4510 &mut current_word_started_at_command_start,
4511 );
4512 depth += 1;
4513 index = next_index;
4514 at_command_start = true;
4515 expecting_redirection_target = false;
4516 }
4517 ')' => {
4518 flush_scanned_command_subst_keyword(
4519 &mut current_word,
4520 &mut pending_case_headers,
4521 &mut case_clause_depths,
4522 depth,
4523 &mut current_word_started_at_command_start,
4524 );
4525 if case_clause_depths
4526 .last()
4527 .is_some_and(|case_depth| *case_depth == depth)
4528 {
4529 index = next_index;
4530 at_command_start = true;
4531 expecting_redirection_target = false;
4532 continue;
4533 }
4534 depth -= 1;
4535 index = next_index;
4536 if depth == 0 {
4537 return Some(index);
4538 }
4539 at_command_start = false;
4540 expecting_redirection_target = false;
4541 }
4542 '"' => {
4543 let had_word = !current_word.is_empty();
4544 flush_scanned_command_subst_keyword(
4545 &mut current_word,
4546 &mut pending_case_headers,
4547 &mut case_clause_depths,
4548 depth,
4549 &mut current_word_started_at_command_start,
4550 );
4551 if had_word && expecting_redirection_target {
4552 expecting_redirection_target = false;
4553 }
4554 index = scan_double_quoted_command_substitution_segment(
4555 input,
4556 next_index,
4557 subst_depth,
4558 )?;
4559 if expecting_redirection_target {
4560 expecting_redirection_target = false;
4561 } else {
4562 at_command_start = false;
4563 }
4564 }
4565 '\'' => {
4566 let had_word = !current_word.is_empty();
4567 flush_scanned_command_subst_keyword(
4568 &mut current_word,
4569 &mut pending_case_headers,
4570 &mut case_clause_depths,
4571 depth,
4572 &mut current_word_started_at_command_start,
4573 );
4574 if had_word && expecting_redirection_target {
4575 expecting_redirection_target = false;
4576 }
4577 index = next_index;
4578 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4579 index = quoted_next;
4580 if quoted_ch == '\'' {
4581 break;
4582 }
4583 }
4584 if expecting_redirection_target {
4585 expecting_redirection_target = false;
4586 } else {
4587 at_command_start = false;
4588 }
4589 }
4590 '`' => {
4591 let had_word = !current_word.is_empty();
4592 flush_scanned_command_subst_keyword(
4593 &mut current_word,
4594 &mut pending_case_headers,
4595 &mut case_clause_depths,
4596 depth,
4597 &mut current_word_started_at_command_start,
4598 );
4599 if had_word && expecting_redirection_target {
4600 expecting_redirection_target = false;
4601 }
4602 index = scan_command_subst_backtick_segment(input, next_index)?;
4603 if expecting_redirection_target {
4604 expecting_redirection_target = false;
4605 } else {
4606 at_command_start = false;
4607 }
4608 }
4609 '$' if input[next_index..].starts_with('\'') => {
4610 let had_word = !current_word.is_empty();
4611 flush_scanned_command_subst_keyword(
4612 &mut current_word,
4613 &mut pending_case_headers,
4614 &mut case_clause_depths,
4615 depth,
4616 &mut current_word_started_at_command_start,
4617 );
4618 if had_word && expecting_redirection_target {
4619 expecting_redirection_target = false;
4620 }
4621 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4622 if expecting_redirection_target {
4623 expecting_redirection_target = false;
4624 } else {
4625 at_command_start = false;
4626 }
4627 }
4628 '\\' => {
4629 let had_word = !current_word.is_empty();
4630 flush_scanned_command_subst_keyword(
4631 &mut current_word,
4632 &mut pending_case_headers,
4633 &mut case_clause_depths,
4634 depth,
4635 &mut current_word_started_at_command_start,
4636 );
4637 if had_word && expecting_redirection_target {
4638 expecting_redirection_target = false;
4639 }
4640 index = next_index;
4641 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4642 index = escaped_next;
4643 }
4644 if expecting_redirection_target {
4645 expecting_redirection_target = false;
4646 } else {
4647 at_command_start = false;
4648 }
4649 }
4650 '>' => {
4651 let word_was_redirection_fd = current_word_started_at_command_start
4652 && !current_word.is_empty()
4653 && current_word.chars().all(|current| current.is_ascii_digit());
4654 flush_scanned_command_subst_keyword(
4655 &mut current_word,
4656 &mut pending_case_headers,
4657 &mut case_clause_depths,
4658 depth,
4659 &mut current_word_started_at_command_start,
4660 );
4661 if word_was_redirection_fd {
4662 at_command_start = true;
4663 }
4664 index = next_index;
4665 expecting_redirection_target = true;
4666 }
4667 '<' if input[next_index..].starts_with('<') => {
4668 let word_was_redirection_fd = current_word_started_at_command_start
4669 && !current_word.is_empty()
4670 && current_word.chars().all(|current| current.is_ascii_digit());
4671 let had_word = !current_word.is_empty();
4672 flush_scanned_command_subst_keyword(
4673 &mut current_word,
4674 &mut pending_case_headers,
4675 &mut case_clause_depths,
4676 depth,
4677 &mut current_word_started_at_command_start,
4678 );
4679 if had_word && expecting_redirection_target {
4680 expecting_redirection_target = false;
4681 }
4682 if word_was_redirection_fd {
4683 at_command_start = true;
4684 }
4685 if inside_unclosed_double_paren_on_line(input, index) {
4686 index = next_index + '<'.len_utf8();
4687 continue;
4688 }
4689
4690 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4691 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4692 expecting_redirection_target = true;
4693 continue;
4694 }
4695
4696 let strip_tabs = input[next_index..].starts_with("<-");
4697 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4698 if let Some((delimiter_index, delimiter)) =
4699 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4700 {
4701 pending_heredocs.push((delimiter, strip_tabs));
4702 index = delimiter_index;
4703 expecting_redirection_target = false;
4704 } else {
4705 index = next_index;
4706 expecting_redirection_target = true;
4707 }
4708 }
4709 '\n' => {
4710 flush_scanned_command_subst_keyword(
4711 &mut current_word,
4712 &mut pending_case_headers,
4713 &mut case_clause_depths,
4714 depth,
4715 &mut current_word_started_at_command_start,
4716 );
4717 index = next_index;
4718 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4719 index =
4720 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4721 }
4722 at_command_start = true;
4723 expecting_redirection_target = false;
4724 }
4725 '$' if input[next_index..].starts_with('{') => {
4726 let had_word = !current_word.is_empty();
4727 flush_scanned_command_subst_keyword(
4728 &mut current_word,
4729 &mut pending_case_headers,
4730 &mut case_clause_depths,
4731 depth,
4732 &mut current_word_started_at_command_start,
4733 );
4734 if had_word && expecting_redirection_target {
4735 expecting_redirection_target = false;
4736 }
4737 let consumed = scan_command_subst_parameter_expansion_len(
4738 &input[next_index + '{'.len_utf8()..],
4739 subst_depth,
4740 )?;
4741 index = next_index + '{'.len_utf8() + consumed;
4742 if expecting_redirection_target {
4743 expecting_redirection_target = false;
4744 } else {
4745 at_command_start = false;
4746 }
4747 }
4748 '$' if input[next_index..].starts_with('(')
4749 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4750 {
4751 let had_word = !current_word.is_empty();
4752 flush_scanned_command_subst_keyword(
4753 &mut current_word,
4754 &mut pending_case_headers,
4755 &mut case_clause_depths,
4756 depth,
4757 &mut current_word_started_at_command_start,
4758 );
4759 if had_word && expecting_redirection_target {
4760 expecting_redirection_target = false;
4761 }
4762 let consumed = scan_command_substitution_body_len_inner(
4763 &input[next_index + '('.len_utf8()..],
4764 subst_depth + 1,
4765 )?;
4766 index = next_index + '('.len_utf8() + consumed;
4767 if expecting_redirection_target {
4768 expecting_redirection_target = false;
4769 } else {
4770 at_command_start = false;
4771 }
4772 }
4773 _ => {
4774 if ch.is_ascii_alphanumeric() || ch == '_' {
4775 if current_word.is_empty() && !expecting_redirection_target && at_command_start
4776 {
4777 current_word_started_at_command_start = true;
4778 at_command_start = false;
4779 }
4780 current_word.push(ch);
4781 } else {
4782 let had_word = !current_word.is_empty();
4783 flush_scanned_command_subst_keyword(
4784 &mut current_word,
4785 &mut pending_case_headers,
4786 &mut case_clause_depths,
4787 depth,
4788 &mut current_word_started_at_command_start,
4789 );
4790 if had_word && expecting_redirection_target {
4791 expecting_redirection_target = false;
4792 }
4793 match ch {
4794 ' ' | '\t' => {}
4795 ';' | '|' | '&' => {
4796 at_command_start = true;
4797 expecting_redirection_target = false;
4798 }
4799 _ => {
4800 if !expecting_redirection_target {
4801 at_command_start = false;
4802 }
4803 }
4804 }
4805 }
4806 index = next_index;
4807 }
4808 }
4809 }
4810
4811 None
4812}
4813
4814pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4815 scan_command_substitution_body_len_inner(input, 0)
4816}
4817
4818#[cfg(test)]
4819mod tests {
4820 use super::*;
4821
4822 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4823 match token.kind {
4824 kind if kind.is_word_like() => token.word_string(),
4825 TokenKind::Comment => token
4826 .span
4827 .slice(source)
4828 .strip_prefix('#')
4829 .map(str::to_string),
4830 TokenKind::Error => token
4831 .error_kind()
4832 .map(LexerErrorKind::message)
4833 .map(str::to_string),
4834 _ => None,
4835 }
4836 }
4837
4838 fn assert_next_token(
4839 lexer: &mut Lexer<'_>,
4840 expected_kind: TokenKind,
4841 expected_text: Option<&str>,
4842 ) {
4843 let token = lexer.next_lexed_token().unwrap();
4844 assert_eq!(token.kind, expected_kind);
4845 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4846 }
4847
4848 fn assert_next_token_with_comments(
4849 lexer: &mut Lexer<'_>,
4850 expected_kind: TokenKind,
4851 expected_text: Option<&str>,
4852 ) {
4853 let token = lexer.next_lexed_token_with_comments().unwrap();
4854 assert_eq!(token.kind, expected_kind);
4855 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4856 }
4857
4858 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4859 let mut lexer = Lexer::new(input);
4860
4861 while let Some(token) = lexer.next_lexed_token() {
4862 if token.kind == TokenKind::Newline {
4863 continue;
4864 }
4865
4866 assert_eq!(
4867 token.span.start.line, token.span.end.line,
4868 "token should stay on one line: {:?}",
4869 token
4870 );
4871 }
4872 }
4873
4874 #[test]
4875 fn test_simple_words() {
4876 let mut lexer = Lexer::new("echo hello world");
4877
4878 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4879 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4880 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4881 assert!(lexer.next_lexed_token().is_none());
4882 }
4883
4884 #[test]
4885 fn test_single_quoted_string() {
4886 let mut lexer = Lexer::new("echo 'hello world'");
4887
4888 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4889 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4891 assert!(lexer.next_lexed_token().is_none());
4892 }
4893
4894 #[test]
4895 fn test_double_quoted_string() {
4896 let mut lexer = Lexer::new("echo \"hello world\"");
4897
4898 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4899 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4900 assert!(lexer.next_lexed_token().is_none());
4901 }
4902
4903 #[test]
4904 fn test_brace_expansion_token_ignores_quoted_closers() {
4905 let mut lexer = Lexer::new("echo {\"}\",a}\n");
4906
4907 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4908 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
4909 assert_next_token(&mut lexer, TokenKind::Newline, None);
4910 assert!(lexer.next_lexed_token().is_none());
4911 }
4912
4913 #[test]
4914 fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
4915 let mut lexer = Lexer::new("echo {'a\\',b} next\n");
4916
4917 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4918 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
4919 assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
4920 assert_next_token(&mut lexer, TokenKind::Newline, None);
4921 assert!(lexer.next_lexed_token().is_none());
4922 }
4923
4924 #[test]
4925 fn test_double_quoted_expansion_token_keeps_source_backing() {
4926 let source = r#""$bar""#;
4927 let mut lexer = Lexer::new(source);
4928
4929 let token = lexer.next_lexed_token().unwrap();
4930 assert_eq!(token.kind, TokenKind::QuotedWord);
4931 assert_eq!(token.word_text(), Some("$bar"));
4932
4933 let word = token.word().unwrap();
4934 let segment = word.single_segment().unwrap();
4935 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4936 assert_eq!(segment.span().unwrap().slice(source), "$bar");
4937 }
4938
4939 #[test]
4940 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4941 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4942 let mut lexer = Lexer::new(source);
4943
4944 let token = lexer.next_lexed_token().unwrap();
4945 assert_eq!(token.kind, TokenKind::QuotedWord);
4946 assert_eq!(
4947 token.word_text(),
4948 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4949 );
4950 }
4951
4952 #[test]
4953 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4954 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4955 let mut lexer = Lexer::new(source);
4956
4957 let token = lexer.next_lexed_token().unwrap();
4958 assert_eq!(token.kind, TokenKind::QuotedWord);
4959 assert_eq!(
4960 token.word_text(),
4961 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4962 );
4963 }
4964
4965 #[test]
4966 fn test_mixed_word_keeps_segment_kinds() {
4967 let source = r#"foo"bar"'baz'"#;
4968 let mut lexer = Lexer::new(source);
4969
4970 let token = lexer.next_lexed_token().unwrap();
4971 assert_eq!(token.kind, TokenKind::Word);
4972
4973 let word = token.word().unwrap();
4974 let segments: Vec<_> = word
4975 .segments()
4976 .map(|segment| (segment.kind(), segment.as_str().to_string()))
4977 .collect();
4978
4979 assert_eq!(
4980 segments,
4981 vec![
4982 (LexedWordSegmentKind::Plain, "foo".to_string()),
4983 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4984 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4985 ]
4986 );
4987 assert_eq!(word.joined_text(), "foobarbaz");
4988 assert_eq!(
4989 word.segments()
4990 .next()
4991 .and_then(LexedWordSegment::span)
4992 .unwrap()
4993 .slice(source),
4994 "foo"
4995 );
4996 }
4997
4998 #[test]
4999 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5000 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5001
5002 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5003 let body = &source[..consumed];
5004
5005 assert!(body.contains("field, direction"));
5006 assert!(body.ends_with(')'));
5007 }
5008
5009 #[test]
5010 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5011 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5012
5013 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5014 let body = &source[..consumed];
5015
5016 assert!(body.contains("printf '%s' y"));
5017 assert!(body.ends_with(')'));
5018 }
5019
5020 #[test]
5021 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5022 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5023
5024 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5025 let body = &source[..consumed];
5026
5027 assert!(body.contains("printf %s 1,2"));
5028 assert!(body.ends_with(')'));
5029 }
5030
5031 #[test]
5032 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5033 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5034
5035 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5036 let body = &source[..consumed];
5037
5038 assert!(body.contains("field, direction"));
5039 assert!(body.ends_with(')'));
5040 }
5041
5042 #[test]
5043 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5044 let source = "printf %s ${x//foo/)},1)\"";
5045
5046 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5047 let body = &source[..consumed];
5048
5049 assert!(body.contains("${x//foo/)},1"));
5050 assert!(body.ends_with(')'));
5051 }
5052
5053 #[test]
5054 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5055 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5056
5057 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5058 let body = &source[..consumed];
5059
5060 assert!(body.contains("printf %s 1,2"));
5061 assert!(body.ends_with(')'));
5062 }
5063
5064 #[test]
5065 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5066 let source = "[[ \"$buf\" == (#b)(*) ]]";
5067 let index = source.find('#').expect("expected hash");
5068
5069 assert!(!hash_starts_comment(source, index));
5070 }
5071
5072 #[test]
5073 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5074 let source = "(#comment with )";
5075 let index = source.find('#').expect("expected hash");
5076
5077 assert!(hash_starts_comment(source, index));
5078 }
5079
5080 #[test]
5081 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5082 let source = "(( #c < 256 ))";
5083 let index = source.find('#').expect("expected hash");
5084
5085 assert!(!hash_starts_comment(source, index));
5086 }
5087
5088 #[test]
5089 fn test_hash_starts_comment_respects_quoted_double_parens() {
5090 let source = "printf '((' # comment";
5091 let index = source.find('#').expect("expected hash");
5092
5093 assert!(hash_starts_comment(source, index));
5094 }
5095
5096 #[test]
5097 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5098 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5099
5100 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5101 let body = &source[..consumed];
5102
5103 assert!(body.contains("printf %s 1,2"));
5104 assert!(body.ends_with(')'));
5105 }
5106
5107 #[test]
5108 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5109 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5110
5111 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5112 let body = &source[..consumed];
5113
5114 assert!(body.contains("printf %s 1,2"));
5115 assert!(body.ends_with(')'));
5116 }
5117
5118 #[test]
5119 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5120 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5121
5122 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5123 let body = &source[..consumed];
5124
5125 assert!(body.contains("printf %s 1,2"));
5126 assert!(body.ends_with(')'));
5127 }
5128
5129 #[test]
5130 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5131 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5132
5133 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5134 let body = &source[..consumed];
5135
5136 assert!(body.contains("printf %s 1,2"));
5137 assert!(body.ends_with("))"));
5138 }
5139
5140 #[test]
5141 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5142 let source = "printf %s 1,2; echo case in)\"";
5143
5144 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5145 let body = &source[..consumed];
5146
5147 assert!(body.contains("echo case in"));
5148 assert!(body.ends_with(')'));
5149 }
5150
5151 #[test]
5152 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5153 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5154
5155 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5156 let body = &source[..consumed];
5157
5158 assert!(body.contains("$'a\\'b'"));
5159 assert!(body.contains("printf %s 1,2"));
5160 assert!(body.ends_with(')'));
5161 }
5162
5163 #[test]
5164 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5165 let source = "printf %s `echo foo)`; printf %s ok)\"";
5166
5167 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5168 let body = &source[..consumed];
5169
5170 assert!(body.contains("`echo foo)`"));
5171 assert!(body.contains("printf %s ok"));
5172 assert!(body.ends_with(')'));
5173 }
5174
5175 #[test]
5176 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5177 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5178
5179 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5180 let body = &source[..consumed];
5181
5182 assert!(body.contains("${x/`echo }`/foo)},1"));
5183 assert!(body.ends_with(')'));
5184 }
5185
5186 #[test]
5187 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5188 {
5189 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5190
5191 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5192 let body = &source[..consumed];
5193
5194 assert!(body.contains("${x/<(echo })/foo)},1"));
5195 assert!(body.ends_with(')'));
5196 }
5197
5198 #[test]
5199 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5200 let source = "printf %s 1,2; echo case in)";
5201
5202 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5203 let body = &source[..consumed];
5204
5205 assert_eq!(body, source);
5206 }
5207
5208 #[test]
5209 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5210 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5211
5212 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5213 let body = &source[..consumed];
5214
5215 assert_eq!(body, source);
5216 }
5217
5218 #[test]
5219 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5220 let source = "printf %s `echo foo)`; printf %s ok)";
5221
5222 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5223 let body = &source[..consumed];
5224
5225 assert_eq!(body, source);
5226 }
5227
5228 #[test]
5229 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5230 let source = "echo \"$line\" | cut -d' ' -f2-)";
5231
5232 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5233 let body = &source[..consumed];
5234
5235 assert_eq!(body, source);
5236 }
5237
5238 #[test]
5239 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5240 let source = "echo \"${@}\" | tr -d '[:space:]')";
5241
5242 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5243 let body = &source[..consumed];
5244
5245 assert_eq!(body, source);
5246 }
5247
5248 #[test]
5249 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5250 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5251
5252 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5253 let body = &source[..consumed];
5254
5255 assert_eq!(body, source);
5256 }
5257
5258 #[test]
5259 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5260 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5261
5262 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5263 let body = &source[..consumed];
5264
5265 assert_eq!(body, source);
5266 }
5267
5268 #[test]
5269 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5270 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5271 let mut lexer = Lexer::new(source);
5272
5273 let first = lexer.next_lexed_token().expect("expected first token");
5274 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5275 assert_eq!(first.word_string().as_deref(), Some("echo"));
5276
5277 let second = lexer.next_lexed_token().expect("expected second token");
5278 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5279 assert_eq!(
5280 second.word_string().as_deref(),
5281 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5282 );
5283 }
5284
5285 #[test]
5286 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5287 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5288 let start = source.find("$(").expect("expected command substitution") + 2;
5289 let consumed =
5290 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5291 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5292 }
5293
5294 #[test]
5295 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5296 let source = "echo $(echo $(basename $filename .fuzz))";
5297 let start = source.find("$(").expect("expected command substitution") + 2;
5298 let consumed =
5299 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5300 assert_eq!(
5301 &source[start..start + consumed],
5302 "echo $(basename $filename .fuzz))"
5303 );
5304 }
5305
5306 #[test]
5307 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5308 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5309 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5310 assert_eq!(consumed, source.len());
5311 }
5312
5313 #[test]
5314 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5315 let source = "'foo'bar";
5316 let mut lexer = Lexer::new(source);
5317
5318 let token = lexer.next_lexed_token().unwrap();
5319 assert_eq!(token.kind, TokenKind::LiteralWord);
5320
5321 let word = token.word().unwrap();
5322 let segments: Vec<_> = word
5323 .segments()
5324 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5325 .collect();
5326
5327 assert_eq!(
5328 segments,
5329 vec![
5330 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5331 (LexedWordSegmentKind::Plain, "bar".to_string()),
5332 ]
5333 );
5334 assert_eq!(word.joined_text(), "foobar");
5335 assert_eq!(
5336 word.segments()
5337 .nth(1)
5338 .and_then(LexedWordSegment::span)
5339 .unwrap()
5340 .slice(source),
5341 "bar"
5342 );
5343 }
5344
5345 #[test]
5346 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5347 let source = "$(printf hi)";
5348 let mut lexer = Lexer::new(source);
5349
5350 let token = lexer.next_lexed_token().unwrap();
5351 assert_eq!(token.kind, TokenKind::Word);
5352
5353 let word = token.word().unwrap();
5354 let segment = word.single_segment().unwrap();
5355 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5356 assert_eq!(segment.as_str(), source);
5357 assert_eq!(segment.span().unwrap().slice(source), source);
5358 }
5359
5360 #[test]
5361 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5362 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5363 let mut lexer = Lexer::new(source);
5364
5365 let token = lexer.next_lexed_token().unwrap();
5366 assert_eq!(token.kind, TokenKind::Word);
5367
5368 let word = token.word().unwrap();
5369 let segment = word.single_segment().unwrap();
5370 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5371 assert_eq!(segment.as_str(), source);
5372 assert_eq!(segment.span().unwrap().slice(source), source);
5373 }
5374
5375 #[test]
5376 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5377 let source = "\"foo\"$(printf hi)";
5378 let mut lexer = Lexer::new(source);
5379
5380 let token = lexer.next_lexed_token().unwrap();
5381 assert_eq!(token.kind, TokenKind::Word);
5382
5383 let word = token.word().unwrap();
5384 let continuation = word.segments().nth(1).unwrap();
5385 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5386 assert_eq!(continuation.as_str(), "$(printf hi)");
5387 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5388 }
5389
5390 #[test]
5391 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5392 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5393 let mut lexer = Lexer::new(source);
5394
5395 let token = lexer.next_lexed_token().unwrap();
5396 assert_eq!(token.kind, TokenKind::QuotedWord);
5397
5398 let word = token.word().unwrap();
5399 let segment = word.single_segment().unwrap();
5400 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5401 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5402 assert_eq!(
5403 segment.span().unwrap().slice(source),
5404 "${arr[$RANDOM % ${#arr[@]}]}"
5405 );
5406 }
5407
5408 #[test]
5409 fn test_ansi_c_control_escape_can_consume_quote() {
5410 let mut lexer = Lexer::new("echo $'\\c''");
5411
5412 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5413 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5414 assert!(lexer.next_lexed_token().is_none());
5415 }
5416
5417 #[test]
5418 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5419 let source = r#"out_line="${out_line//'"'/'\"'}"
5420"#;
5421 let mut lexer = Lexer::new(source);
5422
5423 assert_next_token(
5424 &mut lexer,
5425 TokenKind::Word,
5426 Some(r#"out_line=${out_line//'"'/'"'}"#),
5427 );
5428 assert_next_token(&mut lexer, TokenKind::Newline, None);
5429 assert!(lexer.next_lexed_token().is_none());
5430 }
5431
5432 #[test]
5433 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5434 let source = r#"out_line="${out_line//'"'/'\"'}"
5435echo "Error: Missing python3!"
5436cat << 'EOF' > "${pywrapper}"
5437import os
5438EOF
5439"#;
5440 let mut lexer = Lexer::new(source);
5441
5442 assert_next_token(
5443 &mut lexer,
5444 TokenKind::Word,
5445 Some(r#"out_line=${out_line//'"'/'"'}"#),
5446 );
5447 assert_next_token(&mut lexer, TokenKind::Newline, None);
5448 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5449 assert_next_token(
5450 &mut lexer,
5451 TokenKind::QuotedWord,
5452 Some("Error: Missing python3!"),
5453 );
5454 assert_next_token(&mut lexer, TokenKind::Newline, None);
5455 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5456 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5457 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5458 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5459 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5460 }
5461
5462 #[test]
5463 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5464 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5465 let mut lexer = Lexer::new(source);
5466
5467 let token = lexer.next_lexed_token().unwrap();
5468 assert_eq!(token.kind, TokenKind::Word);
5469 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5470 assert!(token.source_slice(source).is_none());
5471 assert_eq!(
5472 token.word_string().as_deref(),
5473 Some("crypt=${crypt//\\/\\\\}")
5474 );
5475 assert_next_token(&mut lexer, TokenKind::Newline, None);
5476 assert!(lexer.next_lexed_token().is_none());
5477 }
5478
5479 #[test]
5480 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5481 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5482 let mut lexer = Lexer::new(source);
5483
5484 assert_next_token(
5485 &mut lexer,
5486 TokenKind::Word,
5487 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5488 );
5489 assert_next_token(&mut lexer, TokenKind::Newline, None);
5490 assert_next_token(&mut lexer, TokenKind::Newline, None);
5491 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5492 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5493 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5494 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5495 assert_next_token(&mut lexer, TokenKind::Newline, None);
5496 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5497 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5498 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5499 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5500 assert_next_token(&mut lexer, TokenKind::Newline, None);
5501 assert_next_token(
5502 &mut lexer,
5503 TokenKind::Word,
5504 Some(
5505 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5506 ),
5507 );
5508 assert_next_token(&mut lexer, TokenKind::Newline, None);
5509 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5510 assert_next_token(&mut lexer, TokenKind::Newline, None);
5511 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5512 assert_next_token(&mut lexer, TokenKind::Newline, None);
5513 assert!(lexer.next_lexed_token().is_none());
5514 }
5515
5516 #[test]
5517 fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5518 let source = "case \"$word\" in\n {) : ;;\n :) : ;;\nesac\n";
5519 let mut lexer = Lexer::new(source);
5520
5521 assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5522 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5523 assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5524 assert_next_token(&mut lexer, TokenKind::Newline, None);
5525 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5526 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5527 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5528 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5529 assert_next_token(&mut lexer, TokenKind::Newline, None);
5530 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5531 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5532 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5533 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5534 assert_next_token(&mut lexer, TokenKind::Newline, None);
5535 assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5536 assert_next_token(&mut lexer, TokenKind::Newline, None);
5537 assert!(lexer.next_lexed_token().is_none());
5538 }
5539
5540 #[test]
5541 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5542 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5543 let mut lexer = Lexer::new(source);
5544
5545 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5546 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5547 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5548 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5549 assert_next_token(&mut lexer, TokenKind::And, None);
5550 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5551 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5552 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5553 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5554 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5555 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5556 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5557 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5558 assert_next_token(&mut lexer, TokenKind::Newline, None);
5559 assert!(lexer.next_lexed_token().is_none());
5560 }
5561
5562 #[test]
5563 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5564 let source = "echo -{$(echo a),b}-\n";
5565 let mut lexer = Lexer::new(source);
5566
5567 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5568 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5569 assert_next_token(&mut lexer, TokenKind::Newline, None);
5570 assert!(lexer.next_lexed_token().is_none());
5571 }
5572
5573 #[test]
5574 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5575 let source = "echo -{$((1 + 2)),b}-\n";
5576 let mut lexer = Lexer::new(source);
5577
5578 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5579 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5580 assert_next_token(&mut lexer, TokenKind::Newline, None);
5581 assert!(lexer.next_lexed_token().is_none());
5582 }
5583
5584 #[test]
5585 fn test_operators() {
5586 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5587
5588 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5589 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5590 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5591 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5592 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5593 assert_next_token(&mut lexer, TokenKind::And, None);
5594 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5595 assert_next_token(&mut lexer, TokenKind::Or, None);
5596 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5597 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5598 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5599 assert_next_token(&mut lexer, TokenKind::Background, None);
5600 assert!(lexer.next_lexed_token().is_none());
5601 }
5602
5603 #[test]
5604 fn test_double_left_bracket_requires_separator() {
5605 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5606
5607 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5608 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5609 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5610 assert_next_token(&mut lexer, TokenKind::Newline, None);
5611 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5612 assert_next_token(&mut lexer, TokenKind::Newline, None);
5613 assert!(lexer.next_lexed_token().is_none());
5614 }
5615
5616 #[test]
5617 fn test_redirects() {
5618 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5619
5620 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5621 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5622 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5623 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5624 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5625 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5626 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5627 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5628 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5629 let token = lexer.next_lexed_token().unwrap();
5630 assert_eq!(token.kind, TokenKind::Clobber);
5631 assert_eq!(token.fd_value(), Some(2));
5632 assert_eq!(token_text(&token, lexer.input), None);
5633 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5634 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5635 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5636 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5637 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5638 assert_next_token(&mut lexer, TokenKind::HereString, None);
5639 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5640 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5641 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5642 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5643 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5644 }
5645
5646 #[test]
5647 fn test_comment() {
5648 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5649
5650 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5651 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5652 assert_next_token(&mut lexer, TokenKind::Newline, None);
5653 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5654 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5655 }
5656
5657 #[test]
5658 fn test_comment_token_with_span() {
5659 let mut lexer = Lexer::new("# lead\necho hi # tail");
5660
5661 let comment = lexer.next_lexed_token_with_comments().unwrap();
5662 assert_eq!(comment.kind, TokenKind::Comment);
5663 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5664 assert_eq!(comment.span.start.line, 1);
5665 assert_eq!(comment.span.start.column, 1);
5666 assert_eq!(comment.span.end.line, 1);
5667 assert_eq!(comment.span.end.column, 7);
5668
5669 assert_next_token(&mut lexer, TokenKind::Newline, None);
5670 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5671 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5672
5673 let inline = lexer.next_lexed_token_with_comments().unwrap();
5674 assert_eq!(inline.kind, TokenKind::Comment);
5675 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5676 assert_eq!(inline.span.start.line, 2);
5677 assert_eq!(inline.span.start.column, 9);
5678 }
5679
5680 #[test]
5681 fn test_comment_token_preserves_hash_boundaries() {
5682 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5683
5684 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5685 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5686 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5687 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5688 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5689 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5690 assert!(lexer.next_lexed_token_with_comments().is_none());
5691 }
5692
5693 #[test]
5694 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5695 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5696
5697 let mut saw_comment = false;
5698 while let Some(token) = lexer.next_lexed_token_with_comments() {
5699 if token.kind == TokenKind::Comment {
5700 saw_comment = true;
5701 break;
5702 }
5703 }
5704
5705 assert!(
5706 !saw_comment,
5707 "zsh inline glob controls inside [[ ]] should not lex as comments"
5708 );
5709 }
5710
5711 #[test]
5712 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5713 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5714
5715 let mut saw_comment = false;
5716 while let Some(token) = lexer.next_lexed_token_with_comments() {
5717 if token.kind == TokenKind::Comment {
5718 saw_comment = true;
5719 break;
5720 }
5721 }
5722
5723 assert!(
5724 !saw_comment,
5725 "zsh arithmetic char literals inside (( )) should not lex as comments"
5726 );
5727 }
5728
5729 #[test]
5730 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5731 let mut lexer = Lexer::new(
5732 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5733 );
5734
5735 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5736 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5737 assert_next_token(
5738 &mut lexer,
5739 TokenKind::LiteralWord,
5740 Some("\\e]133;C;cmdline_url=%s\\a"),
5741 );
5742 assert_next_token(
5743 &mut lexer,
5744 TokenKind::QuotedWord,
5745 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5746 );
5747 assert_next_token(&mut lexer, TokenKind::Newline, None);
5748 }
5749
5750 #[test]
5751 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5752 let mut lexer = Lexer::new(
5753 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5754 );
5755
5756 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5757 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5758 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5759 assert_next_token(&mut lexer, TokenKind::Newline, None);
5760 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5761 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5762 assert_next_token(
5763 &mut lexer,
5764 TokenKind::LiteralWord,
5765 Some("\\e]133;C;cmdline_url=%s\\a"),
5766 );
5767 assert_next_token(
5768 &mut lexer,
5769 TokenKind::QuotedWord,
5770 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5771 );
5772 assert_next_token(&mut lexer, TokenKind::Newline, None);
5773 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5774 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5775 assert_next_token(&mut lexer, TokenKind::Newline, None);
5776 }
5777
5778 #[test]
5779 fn test_variable_words() {
5780 let mut lexer = Lexer::new("echo $HOME $USER");
5781
5782 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5783 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5784 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5785 assert!(lexer.next_lexed_token().is_none());
5786 }
5787
5788 #[test]
5789 fn test_pipeline_tokens() {
5790 let mut lexer = Lexer::new("echo hello | cat");
5791
5792 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5793 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5794 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5795 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5796 assert!(lexer.next_lexed_token().is_none());
5797 }
5798
5799 #[test]
5800 fn test_read_heredoc() {
5801 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5803 let content = lexer.read_heredoc("EOF", false);
5804 assert_eq!(content.content, "hello\nworld\n");
5805 }
5806
5807 #[test]
5808 fn test_read_heredoc_single_line() {
5809 let mut lexer = Lexer::new("\ntest\nEOF");
5810 let content = lexer.read_heredoc("EOF", false);
5811 assert_eq!(content.content, "test\n");
5812 }
5813
5814 #[test]
5815 fn test_read_heredoc_full_scenario() {
5816 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5818
5819 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5821 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5822 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5823
5824 let content = lexer.read_heredoc("EOF", false);
5826 assert_eq!(content.content, "hello\nworld\n");
5827 }
5828
5829 #[test]
5830 fn test_read_heredoc_with_redirect() {
5831 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5833 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5834 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5835 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5836 let content = lexer.read_heredoc("EOF", false);
5837 assert_eq!(content.content, "hello\n");
5838 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5840 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5841 }
5842
5843 #[test]
5844 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5845 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
5846 let mut lexer = Lexer::new(source);
5847
5848 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5849 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5850 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5851
5852 let heredoc = lexer.read_heredoc("EOF", false);
5853 assert_eq!(heredoc.content, "hello\n");
5854
5855 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5856 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5857 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5858 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5859 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5860 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5861 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5862 }
5863
5864 #[test]
5865 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5866 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5867 let mut lexer = Lexer::new(source);
5868
5869 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5870 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5871 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5872
5873 let heredoc = lexer.read_heredoc("EOF", false);
5874 assert_eq!(heredoc.content, "1\n2\n3\n");
5875 }
5876
5877 #[test]
5878 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5879 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5880 let mut lexer = Lexer::new(source);
5881
5882 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5883 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5884 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5885
5886 let heredoc = lexer.read_heredoc("EOF", false);
5887 assert_eq!(heredoc.content, "body\n");
5888 }
5889
5890 #[test]
5891 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5892 let source = "cat <<EOF # note \\\nbody\nEOF\n";
5893 let mut lexer = Lexer::new(source);
5894
5895 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5896 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5897 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5898
5899 let heredoc = lexer.read_heredoc("EOF", false);
5900 assert_eq!(heredoc.content, "body\n");
5901 }
5902
5903 #[test]
5904 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5905 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5906 let mut lexer = Lexer::new(source);
5907
5908 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5909 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5910 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5911 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5912
5913 let heredoc = lexer.read_heredoc("EOF", false);
5914 assert_eq!(heredoc.content, "body\n");
5915
5916 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5917 }
5918
5919 #[test]
5920 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5921 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5922 let mut lexer = Lexer::new(source);
5923
5924 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5925 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5926 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5927
5928 let heredoc = lexer.read_heredoc("EOF", false);
5929 assert_eq!(heredoc.content, "1\n");
5930
5931 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5932 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5933 }
5934
5935 #[test]
5936 fn test_read_heredoc_with_redirect_preserves_following_spans() {
5937 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5938 let mut lexer = Lexer::new(source);
5939
5940 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5941 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5942 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5943
5944 let heredoc = lexer.read_heredoc("EOF", false);
5945 assert_eq!(heredoc.content, "hello\n");
5946
5947 let redirect = lexer.next_lexed_token_with_comments().unwrap();
5948 assert_eq!(redirect.kind, TokenKind::RedirectOut);
5949 assert_eq!(redirect.span.slice(source), ">");
5950
5951 let target = lexer.next_lexed_token_with_comments().unwrap();
5952 assert_eq!(target.kind, TokenKind::Word);
5953 assert_eq!(
5954 token_text(&target, lexer.input).as_deref(),
5955 Some("file.txt")
5956 );
5957 assert_eq!(target.span.slice(source), "file.txt");
5958
5959 let newline = lexer.next_lexed_token_with_comments().unwrap();
5960 assert_eq!(newline.kind, TokenKind::Newline);
5961 assert_eq!(newline.span.slice(source), "\n");
5962
5963 let comment = lexer.next_lexed_token_with_comments().unwrap();
5964 assert_eq!(comment.kind, TokenKind::Comment);
5965 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5966 assert_eq!(comment.span.slice(source), "# done");
5967 }
5968
5969 #[test]
5970 fn test_comment_with_unicode() {
5971 let source = "# café résumé\necho ok";
5973 let mut lexer = Lexer::new(source);
5974
5975 let comment = lexer.next_lexed_token_with_comments().unwrap();
5976 assert_eq!(comment.kind, TokenKind::Comment);
5977 assert_eq!(
5978 token_text(&comment, lexer.input).as_deref(),
5979 Some(" café résumé")
5980 );
5981 let start = comment.span.start.offset;
5983 let end = comment.span.end.offset;
5984 assert_eq!(start, 0);
5985 assert_eq!(&source[start..end], "# café résumé");
5986 assert!(source.is_char_boundary(start));
5987 assert!(source.is_char_boundary(end));
5988
5989 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5990 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5991 }
5992
5993 #[test]
5994 fn test_comment_with_cjk_characters() {
5995 let source = "# 你好世界\necho ok";
5997 let mut lexer = Lexer::new(source);
5998
5999 let comment = lexer.next_lexed_token_with_comments().unwrap();
6000 assert_eq!(comment.kind, TokenKind::Comment);
6001 assert_eq!(
6002 token_text(&comment, lexer.input).as_deref(),
6003 Some(" 你好世界")
6004 );
6005 let start = comment.span.start.offset;
6006 let end = comment.span.end.offset;
6007 assert_eq!(&source[start..end], "# 你好世界");
6008 assert!(source.is_char_boundary(start));
6009 assert!(source.is_char_boundary(end));
6010 }
6011
6012 #[test]
6013 fn test_heredoc_with_comments_inside() {
6014 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6016 let mut lexer = Lexer::new(source);
6017
6018 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6019 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6020 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6021
6022 let heredoc = lexer.read_heredoc("EOF", false);
6023 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6024
6025 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6028 let comment = lexer.next_lexed_token_with_comments().unwrap();
6029 assert_eq!(comment.kind, TokenKind::Comment);
6030 assert_eq!(
6031 token_text(&comment, lexer.input).as_deref(),
6032 Some(" real comment")
6033 );
6034 }
6035
6036 #[test]
6037 fn test_heredoc_with_hash_in_variable() {
6038 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6040 let mut lexer = Lexer::new(source);
6041
6042 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6043 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6044 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6045
6046 let heredoc = lexer.read_heredoc("EOF", false);
6047 assert_eq!(heredoc.content, "val=${x#prefix}\n");
6048 }
6049
6050 #[test]
6051 fn test_heredoc_span_does_not_leak() {
6052 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6055 let mut lexer = Lexer::new(source);
6056
6057 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6058 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6059 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6060
6061 let heredoc = lexer.read_heredoc("EOF", false);
6062 let start = heredoc.content_span.start.offset;
6063 let end = heredoc.content_span.end.offset;
6064 assert!(
6065 end <= source.len(),
6066 "heredoc span end ({end}) exceeds source length ({})",
6067 source.len()
6068 );
6069 assert_eq!(&source[start..end], "hello\nworld\n");
6070
6071 assert_next_token(&mut lexer, TokenKind::Newline, None);
6073 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6074 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6075 }
6076
6077 #[test]
6078 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6079 let source = "\
6080cat <<\\_ACEOF
6081Use these variables to override the choices made by `configure' or to help
6082it to find libraries and programs with nonstandard names/locations.
6083_ACEOF
6084ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6085ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6086";
6087 let mut lexer = Lexer::new(source);
6088
6089 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6090 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6091 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6092 assert_eq!(delimiter.kind, TokenKind::Word);
6093 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6094
6095 let heredoc = lexer.read_heredoc("_ACEOF", false);
6096 assert_eq!(
6097 heredoc.content,
6098 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6099 );
6100
6101 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6102
6103 let first = lexer.next_lexed_token_with_comments().unwrap();
6104 assert_eq!(first.kind, TokenKind::Word);
6105 assert_eq!(
6106 first.span.slice(source),
6107 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6108 );
6109 let first_segments = first
6110 .word()
6111 .unwrap()
6112 .segments()
6113 .map(|segment| {
6114 (
6115 segment.kind(),
6116 segment.as_str().to_string(),
6117 segment.span().map(|span| span.slice(source).to_string()),
6118 )
6119 })
6120 .collect::<Vec<_>>();
6121 assert_eq!(
6122 first_segments,
6123 vec![
6124 (
6125 LexedWordSegmentKind::Plain,
6126 "ac_dir_suffix=/".to_string(),
6127 Some("ac_dir_suffix=/".to_string()),
6128 ),
6129 (
6130 LexedWordSegmentKind::Plain,
6131 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6132 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6133 ),
6134 ]
6135 );
6136
6137 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6138
6139 let second = lexer.next_lexed_token_with_comments().unwrap();
6140 assert_eq!(second.kind, TokenKind::Word);
6141 assert_eq!(
6142 second.span.slice(source),
6143 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6144 );
6145 let second_segments = second
6146 .word()
6147 .unwrap()
6148 .segments()
6149 .map(|segment| {
6150 (
6151 segment.kind(),
6152 segment.as_str().to_string(),
6153 segment.span().map(|span| span.slice(source).to_string()),
6154 )
6155 })
6156 .collect::<Vec<_>>();
6157 assert_eq!(
6158 second_segments,
6159 vec![
6160 (
6161 LexedWordSegmentKind::Plain,
6162 "ac_top_builddir_sub=".to_string(),
6163 Some("ac_top_builddir_sub=".to_string()),
6164 ),
6165 (
6166 LexedWordSegmentKind::Plain,
6167 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6168 Some(
6169 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6170 .to_string(),
6171 ),
6172 ),
6173 ]
6174 );
6175 }
6176
6177 #[test]
6178 fn test_heredoc_with_unicode_content() {
6179 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6181 let mut lexer = Lexer::new(source);
6182
6183 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6184 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6185 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6186
6187 let heredoc = lexer.read_heredoc("EOF", false);
6188 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6189 let start = heredoc.content_span.start.offset;
6190 let end = heredoc.content_span.end.offset;
6191 assert!(
6192 source.is_char_boundary(start),
6193 "heredoc span start ({start}) not on char boundary"
6194 );
6195 assert!(
6196 source.is_char_boundary(end),
6197 "heredoc span end ({end}) not on char boundary"
6198 );
6199 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6200 }
6201
6202 #[test]
6203 fn test_assoc_compound_assignment() {
6204 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6207 assert_next_token(
6208 &mut lexer,
6209 TokenKind::Word,
6210 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6211 );
6212 assert!(lexer.next_lexed_token().is_none());
6213 }
6214
6215 #[test]
6216 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6217 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6218 let mut lexer = Lexer::new(source);
6219
6220 let token = lexer.next_lexed_token().unwrap();
6221 assert_eq!(token.kind, TokenKind::Word);
6222 assert_eq!(token.span.slice(source), source);
6223 assert!(lexer.next_lexed_token().is_none());
6224 }
6225
6226 #[test]
6227 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6228 let source = r#"foo\_bar@(baz|qux)"#;
6229 let mut lexer = Lexer::new(source);
6230
6231 let token = lexer.next_lexed_token().unwrap();
6232 assert_eq!(token.kind, TokenKind::Word);
6233 assert_eq!(token.span.slice(source), source);
6234 assert!(lexer.next_lexed_token().is_none());
6235 }
6236
6237 #[test]
6238 fn test_indexed_array_not_collapsed() {
6239 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6242 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6243 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6244 }
6245
6246 #[test]
6247 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6248 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6249 let mut lexer = Lexer::new(source);
6250
6251 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6252 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6253
6254 let token = lexer.next_lexed_token().unwrap();
6255 assert_eq!(token.kind, TokenKind::Word);
6256 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6257
6258 let word = token.word().unwrap();
6259 let segments: Vec<_> = word
6260 .segments()
6261 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6262 .collect();
6263 assert_eq!(
6264 segments,
6265 vec![
6266 (
6267 LexedWordSegmentKind::DoubleQuoted,
6268 "$plugin_dir".to_string()
6269 ),
6270 (LexedWordSegmentKind::Plain, "/*".to_string()),
6271 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6272 ]
6273 );
6274
6275 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6276 assert!(lexer.next_lexed_token().is_none());
6277 }
6278
6279 #[test]
6280 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6281 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6282 let mut lexer = Lexer::new(source);
6283
6284 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6285 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6286
6287 let token = lexer.next_lexed_token().unwrap();
6288 assert_eq!(token.kind, TokenKind::Word);
6289 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6290
6291 let word = token.word().unwrap();
6292 let segments: Vec<_> = word
6293 .segments()
6294 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6295 .collect();
6296 assert_eq!(
6297 segments,
6298 vec![
6299 (
6300 LexedWordSegmentKind::DoubleQuoted,
6301 "$__GREP_CACHE_FILE".to_string()
6302 ),
6303 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6304 ]
6305 );
6306
6307 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6308 assert!(lexer.next_lexed_token().is_none());
6309 }
6310
6311 #[test]
6312 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6313 let source = r#"$dir/${~pats}(N)"#;
6314 let mut lexer = Lexer::new(source);
6315
6316 let token = lexer.next_lexed_token().unwrap();
6317 assert_eq!(token.kind, TokenKind::Word);
6318 assert_eq!(token.span.slice(source), source);
6319 assert!(lexer.next_lexed_token().is_none());
6320 }
6321
6322 #[test]
6323 fn test_dollar_word_does_not_absorb_function_parens() {
6324 let mut lexer = Lexer::new(r#"foo$x()"#);
6325
6326 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6327 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6328 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6329 assert!(lexer.next_lexed_token().is_none());
6330 }
6331
6332 #[test]
6333 fn test_command_substitution_word_does_not_absorb_function_parens() {
6334 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6335
6336 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6337 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6338 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6339 assert!(lexer.next_lexed_token().is_none());
6340 }
6341
6342 #[test]
6345 fn test_digit_at_eof_no_panic() {
6346 let mut lexer = Lexer::new("2");
6348 let token = lexer.next_lexed_token();
6349 assert!(token.is_some());
6350 }
6351
6352 #[test]
6354 fn test_nested_brace_expansion_single_token() {
6355 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6357 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6358 assert!(lexer.next_lexed_token().is_none());
6360 }
6361
6362 #[test]
6364 fn test_simple_brace_expansion_unchanged() {
6365 let mut lexer = Lexer::new("${foo}");
6366 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6367 assert!(lexer.next_lexed_token().is_none());
6368 }
6369
6370 #[test]
6371 fn test_nvm_fixture_lexes_without_stalling() {
6372 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6373 let mut lexer = Lexer::new(input);
6374 let mut tokens = 0usize;
6375
6376 while lexer.next_lexed_token().is_some() {
6377 tokens += 1;
6378 assert!(
6379 tokens < 100_000,
6380 "lexer should continue making progress on the nvm fixture"
6381 );
6382 }
6383
6384 assert!(tokens > 0, "nvm fixture should produce at least one token");
6385 }
6386
6387 #[test]
6388 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6389 let input = concat!(
6390 "case \"${_input_type:-}\" in\n",
6391 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6392 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6393 "esac\n",
6394 );
6395
6396 assert_non_newline_tokens_stay_on_one_line(input);
6397
6398 let mut lexer = Lexer::new(input);
6399 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6400 .map(|token| (token.kind, token_text(&token, input)))
6401 .collect::<Vec<_>>();
6402 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6403 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6404 }
6405
6406 #[test]
6407 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6408 let input = concat!(
6409 "case $2 in\n",
6410 " cygwin*) bin='cygwin32/bin' ;|\n",
6411 "esac\n",
6412 );
6413
6414 let mut lexer = Lexer::new(input);
6415 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6416 .map(|token| (token.kind, token_text(&token, input)))
6417 .collect::<Vec<_>>();
6418
6419 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6420 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6421 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6422 }
6423
6424 #[test]
6425 fn test_inline_if_with_array_append_stays_line_local() {
6426 let input = concat!(
6427 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6428 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6429 );
6430
6431 assert_non_newline_tokens_stay_on_one_line(input);
6432 }
6433
6434 #[test]
6435 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6436 let source = "unsetopt interactive_comments\n#literal\n";
6437 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6438 let mut lexer = Lexer::with_profile(source, &profile);
6439
6440 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6441 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6442 assert_next_token(&mut lexer, TokenKind::Newline, None);
6443 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6444 }
6445
6446 #[test]
6447 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6448 let source = "setopt rc_quotes\nprint 'a''b'\n";
6449 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6450 let mut lexer = Lexer::with_profile(source, &profile);
6451
6452 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6453 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6454 assert_next_token(&mut lexer, TokenKind::Newline, None);
6455 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6456 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6457 }
6458
6459 #[test]
6460 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6461 let source = "setopt ignore_braces\n{ echo }\n";
6462 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6463 let mut lexer = Lexer::with_profile(source, &profile);
6464
6465 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6466 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6467 assert_next_token(&mut lexer, TokenKind::Newline, None);
6468 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6469 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6470 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6471 }
6472
6473 #[test]
6474 fn test_heredoc_in_arithmetic_fuzz_crash() {
6475 let data: &[u8] = &[
6479 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6480 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6481 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6482 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6483 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6484 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6485 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6486 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6487 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6488 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6489 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6490 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6491 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6492 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6493 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6494 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6495 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6496 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6497 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6498 ];
6499 let input = std::str::from_utf8(data).unwrap();
6500 let script = format!("echo $(({input}))\n");
6501 let _ = crate::parser::Parser::new(&script).parse();
6503 }
6504}