1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335 pub kind: TokenKind,
337 pub span: Span,
339 pub(crate) flags: TokenFlags,
340 payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345 match kind {
346 TokenKind::Word => LexedWordSegmentKind::Plain,
347 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349 _ => LexedWordSegmentKind::Composite,
350 }
351 }
352
353 pub(crate) fn punctuation(kind: TokenKind) -> Self {
354 Self {
355 kind,
356 span: Span::new(),
357 flags: TokenFlags::empty(),
358 payload: TokenPayload::None,
359 }
360 }
361
362 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363 let flags = if word.has_cooked_text() {
364 TokenFlags::cooked_text()
365 } else {
366 TokenFlags::empty()
367 };
368
369 Self {
370 kind,
371 span: Span::new(),
372 flags,
373 payload: TokenPayload::Word(word),
374 }
375 }
376
377 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378 Self::with_word_payload(
379 kind,
380 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381 )
382 }
383
384 fn owned_word(kind: TokenKind, text: String) -> Self {
385 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386 }
387
388 fn comment() -> Self {
389 Self {
390 kind: TokenKind::Comment,
391 span: Span::new(),
392 flags: TokenFlags::empty(),
393 payload: TokenPayload::None,
394 }
395 }
396
397 fn fd(kind: TokenKind, fd: i32) -> Self {
398 Self {
399 kind,
400 span: Span::new(),
401 flags: TokenFlags::empty(),
402 payload: TokenPayload::Fd(fd),
403 }
404 }
405
406 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407 Self {
408 kind,
409 span: Span::new(),
410 flags: TokenFlags::empty(),
411 payload: TokenPayload::FdPair(src_fd, dst_fd),
412 }
413 }
414
415 fn error(kind: LexerErrorKind) -> Self {
416 Self {
417 kind: TokenKind::Error,
418 span: Span::new(),
419 flags: TokenFlags::empty(),
420 payload: TokenPayload::Error(kind),
421 }
422 }
423
424 pub(crate) fn with_span(mut self, span: Span) -> Self {
425 self.span = span;
426 self
427 }
428
429 pub(crate) fn rebased(mut self, base: Position) -> Self {
430 self.span = self.span.rebased(base);
431 self.payload = match self.payload {
432 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433 payload => payload,
434 };
435 self
436 }
437
438 pub(crate) fn with_synthetic_flag(mut self) -> Self {
439 self.flags = self.flags.with_synthetic();
440 self
441 }
442
443 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444 let payload = match self.payload {
445 TokenPayload::None => TokenPayload::None,
446 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449 TokenPayload::Error(kind) => TokenPayload::Error(kind),
450 };
451
452 LexedToken {
453 kind: self.kind,
454 span: self.span,
455 flags: self.flags,
456 payload,
457 }
458 }
459
460 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461 let payload = match self.payload {
462 TokenPayload::None => TokenPayload::None,
463 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466 TokenPayload::Error(kind) => TokenPayload::Error(kind),
467 };
468
469 LexedToken {
470 kind: self.kind,
471 span: self.span,
472 flags: self.flags,
473 payload,
474 }
475 }
476
477 pub fn word_text(&self) -> Option<&str> {
479 self.kind
480 .is_word_like()
481 .then_some(())
482 .and_then(|_| match &self.payload {
483 TokenPayload::Word(word) => word.text(),
484 _ => None,
485 })
486 }
487
488 pub fn word_string(&self) -> Option<String> {
490 self.kind
491 .is_word_like()
492 .then_some(())
493 .and_then(|_| match &self.payload {
494 TokenPayload::Word(word) => Some(word.joined_text()),
495 _ => None,
496 })
497 }
498
499 pub fn word(&self) -> Option<&LexedWord<'a>> {
501 match &self.payload {
502 TokenPayload::Word(word) => Some(word),
503 _ => None,
504 }
505 }
506
507 pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510 return None;
511 }
512
513 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514 .then(|| &source[self.span.start.offset..self.span.end.offset])
515 }
516
517 pub fn fd_value(&self) -> Option<i32> {
519 match self.payload {
520 TokenPayload::Fd(fd) => Some(fd),
521 _ => None,
522 }
523 }
524
525 pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527 match self.payload {
528 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529 _ => None,
530 }
531 }
532
533 pub fn error_kind(&self) -> Option<LexerErrorKind> {
535 match self.payload {
536 TokenPayload::Error(kind) => Some(kind),
537 _ => None,
538 }
539 }
540}
541
542#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545 pub content: String,
547 pub content_span: Span,
549}
550
551const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557 rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561 fn new(source: &'a str) -> Self {
562 Self { rest: source }
563 }
564
565 fn first(&self) -> Option<char> {
566 self.rest.chars().next()
567 }
568
569 fn second(&self) -> Option<char> {
570 let mut chars = self.rest.chars();
571 chars.next()?;
572 chars.next()
573 }
574
575 fn third(&self) -> Option<char> {
576 let mut chars = self.rest.chars();
577 chars.next()?;
578 chars.next()?;
579 chars.next()
580 }
581
582 fn bump(&mut self) -> Option<char> {
583 let ch = self.first()?;
584 self.rest = &self.rest[ch.len_utf8()..];
585 Some(ch)
586 }
587
588 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589 let start = self.rest;
590 let mut end = 0;
591
592 for ch in start.chars() {
593 if !predicate(ch) {
594 break;
595 }
596 end += ch.len_utf8();
597 }
598
599 self.rest = &start[end..];
600 &start[..end]
601 }
602
603 fn rest(&self) -> &'a str {
604 self.rest
605 }
606
607 fn skip_bytes(&mut self, count: usize) {
608 self.rest = &self.rest[count..];
609 }
610
611 fn find_byte(&self, byte: u8) -> Option<usize> {
612 memchr(byte, self.rest.as_bytes())
613 }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618 source: &'a str,
619 line_starts: Arc<[usize]>,
620 cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626 pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630 fn new(source: &'a str) -> Self {
631 let mut line_starts =
632 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633 line_starts.push(0);
634 line_starts.extend(
635 source
636 .bytes()
637 .enumerate()
638 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639 );
640
641 Self {
642 source,
643 line_starts: line_starts.into(),
644 cached: Position::new(),
645 }
646 }
647
648 fn position(&mut self, offset: usize) -> Position {
649 if offset == self.cached.offset {
650 return self.cached;
651 }
652
653 let position = if offset > self.cached.offset && offset <= self.source.len() {
654 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655 } else {
656 self.position_uncached(offset)
657 };
658 self.cached = position;
659 position
660 }
661
662 fn position_uncached(&self, offset: usize) -> Position {
663 let offset = offset.min(self.source.len());
664 let line_index = self
665 .line_starts
666 .partition_point(|start| *start <= offset)
667 .saturating_sub(1);
668 let line_start = self.line_starts[line_index];
669 let line_text = &self.source[line_start..offset];
670 let column = if line_text.is_ascii() {
671 line_text.len() + 1
672 } else {
673 line_text.chars().count() + 1
674 };
675
676 Position {
677 line: line_index + 1,
678 column,
679 offset,
680 }
681 }
682
683 fn advance_from(mut position: Position, text: &str) -> Position {
684 position.offset += text.len();
685 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686 if newline_count == 0 {
687 position.column += if text.is_ascii() {
688 text.len()
689 } else {
690 text.chars().count()
691 };
692 return position;
693 }
694
695 position.line += newline_count;
696 let tail_start = memrchr(b'\n', text.as_bytes())
697 .map(|index| index + 1)
698 .unwrap_or_default();
699 let tail = &text[tail_start..];
700 position.column = if tail.is_ascii() {
701 tail.len() + 1
702 } else {
703 tail.chars().count() + 1
704 };
705 position
706 }
707}
708
709#[derive(Clone)]
711pub struct Lexer<'a> {
712 #[allow(dead_code)] input: &'a str,
714 offset: usize,
716 cursor: Cursor<'a>,
717 position_map: PositionMap<'a>,
718 reinject_buf: VecDeque<char>,
721 reinject_resume_offset: Option<usize>,
723 max_subst_depth: usize,
725 initial_zsh_options: Option<ZshOptionState>,
726 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727 zsh_timeline_index: usize,
728 #[cfg(feature = "benchmarking")]
729 benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733 pub fn new(input: &'a str) -> Self {
735 Self::with_max_subst_depth_and_profile(
736 input,
737 DEFAULT_MAX_SUBST_DEPTH,
738 &ShellProfile::native(super::ShellDialect::Bash),
739 None,
740 )
741 }
742
743 pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746 Self::with_max_subst_depth_and_profile(
747 input,
748 max_depth,
749 &ShellProfile::native(super::ShellDialect::Bash),
750 None,
751 )
752 }
753
754 pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757 .then(|| ZshOptionTimeline::build(input, shell_profile))
758 .flatten()
759 .map(Arc::new);
760 Self::with_max_subst_depth_and_profile(
761 input,
762 DEFAULT_MAX_SUBST_DEPTH,
763 shell_profile,
764 zsh_timeline,
765 )
766 }
767
768 pub(crate) fn with_max_subst_depth_and_profile(
769 input: &'a str,
770 max_depth: usize,
771 shell_profile: &ShellProfile,
772 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773 ) -> Self {
774 Self {
775 input,
776 offset: 0,
777 cursor: Cursor::new(input),
778 position_map: PositionMap::new(input),
779 reinject_buf: VecDeque::new(),
780 reinject_resume_offset: None,
781 max_subst_depth: max_depth,
782 initial_zsh_options: shell_profile.zsh_options().cloned(),
783 zsh_timeline,
784 zsh_timeline_index: 0,
785 #[cfg(feature = "benchmarking")]
786 benchmark_counters: None,
787 }
788 }
789
790 pub fn position(&self) -> Position {
792 self.position_map.position_uncached(self.offset)
793 }
794
795 fn current_position(&mut self) -> Position {
796 #[cfg(feature = "benchmarking")]
797 self.maybe_record_current_position_call();
798 self.position_map.position(self.offset)
799 }
800
801 #[cfg(feature = "benchmarking")]
802 pub(crate) fn enable_benchmark_counters(&mut self) {
803 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
804 }
805
806 #[cfg(feature = "benchmarking")]
807 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
808 self.benchmark_counters.unwrap_or_default()
809 }
810
811 #[cfg(feature = "benchmarking")]
812 fn maybe_record_current_position_call(&mut self) {
813 if let Some(counters) = &mut self.benchmark_counters {
814 counters.current_position_calls += 1;
815 }
816 }
817
818 fn sync_offset_to_cursor(&mut self) {
819 if self.reinject_buf.is_empty()
820 && let Some(offset) = self.reinject_resume_offset.take()
821 {
822 self.offset = offset;
823 }
824 }
825
826 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
829 self.next_lexed_token().map(|token| token.kind)
830 }
831
832 fn peek_char(&mut self) -> Option<char> {
833 self.sync_offset_to_cursor();
834 if let Some(&ch) = self.reinject_buf.front() {
835 Some(ch)
836 } else {
837 self.cursor.first()
838 }
839 }
840
841 fn advance(&mut self) -> Option<char> {
842 self.sync_offset_to_cursor();
843 let ch = if !self.reinject_buf.is_empty() {
844 self.reinject_buf.pop_front()
845 } else {
846 self.cursor.bump()
847 };
848 if let Some(c) = ch {
849 self.offset += c.len_utf8();
850 }
851 ch
852 }
853
854 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
855 self.reinject_buf
856 .iter()
857 .copied()
858 .chain(self.cursor.rest().chars())
859 }
860
861 fn second_char(&self) -> Option<char> {
862 match self.reinject_buf.len() {
863 0 => self.cursor.second(),
864 1 => self.cursor.first(),
865 _ => self.reinject_buf.get(1).copied(),
866 }
867 }
868
869 fn third_char(&self) -> Option<char> {
870 match self.reinject_buf.len() {
871 0 => self.cursor.third(),
872 1 => self.cursor.second(),
873 2 => self.cursor.first(),
874 _ => self.reinject_buf.get(2).copied(),
875 }
876 }
877
878 fn fourth_char(&self) -> Option<char> {
879 match self.reinject_buf.len() {
880 0 => self.cursor.rest().chars().nth(3),
881 1 => self.cursor.third(),
882 2 => self.cursor.second(),
883 3 => self.cursor.first(),
884 _ => self.reinject_buf.get(3).copied(),
885 }
886 }
887
888 fn consume_source_bytes(&mut self, byte_len: usize) {
889 debug_assert!(self.reinject_buf.is_empty());
890 self.sync_offset_to_cursor();
891 self.offset += byte_len;
892 self.cursor.skip_bytes(byte_len);
893 }
894
895 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
896 debug_assert!(self.reinject_buf.is_empty());
897 self.offset += byte_len;
898 }
899
900 fn consume_ascii_chars(&mut self, count: usize) {
901 if self.reinject_buf.is_empty() {
902 self.consume_source_bytes(count);
903 return;
904 }
905
906 for _ in 0..count {
907 self.advance();
908 }
909 }
910
911 fn source_horizontal_whitespace_len(&self) -> usize {
912 self.cursor
913 .rest()
914 .as_bytes()
915 .iter()
916 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
917 .count()
918 }
919
920 fn source_ascii_plain_word_len(&self) -> usize {
921 self.cursor
922 .rest()
923 .as_bytes()
924 .iter()
925 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
926 .count()
927 }
928
929 fn find_double_quote_special(source: &str) -> Option<usize> {
930 source
931 .as_bytes()
932 .iter()
933 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
934 }
935
936 fn ensure_capture_from_source(
937 &self,
938 capture: &mut Option<String>,
939 start: Position,
940 end: Position,
941 ) {
942 if capture.is_none() {
943 *capture = Some(self.input[start.offset..end.offset].to_string());
944 }
945 }
946
947 fn push_capture_char(capture: &mut Option<String>, ch: char) {
948 if let Some(text) = capture.as_mut() {
949 text.push(ch);
950 }
951 }
952
953 fn push_capture_str(capture: &mut Option<String>, text: &str) {
954 if let Some(current) = capture.as_mut() {
955 current.push_str(text);
956 }
957 }
958
959 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
960 if let Some(timeline) = self.zsh_timeline.as_ref() {
961 while self.zsh_timeline_index < timeline.entries.len()
962 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
963 {
964 self.zsh_timeline_index += 1;
965 }
966 return if self.zsh_timeline_index == 0 {
967 self.initial_zsh_options.as_ref()
968 } else {
969 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
970 };
971 }
972
973 self.initial_zsh_options.as_ref()
974 }
975
976 fn comments_enabled(&mut self) -> bool {
977 !self
978 .current_zsh_options()
979 .is_some_and(|options| options.interactive_comments.is_definitely_off())
980 }
981
982 fn rc_quotes_enabled(&mut self) -> bool {
983 self.current_zsh_options()
984 .is_some_and(|options| options.rc_quotes.is_definitely_on())
985 }
986
987 fn ignore_braces_enabled(&mut self) -> bool {
988 self.current_zsh_options()
989 .is_some_and(|options| options.ignore_braces.is_definitely_on())
990 }
991
992 fn ignore_close_braces_enabled(&mut self) -> bool {
993 self.current_zsh_options().is_some_and(|options| {
994 options.ignore_braces.is_definitely_on()
995 || options.ignore_close_braces.is_definitely_on()
996 })
997 }
998
999 fn should_treat_hash_as_word_char(&mut self) -> bool {
1000 if !self.comments_enabled() {
1001 return true;
1002 }
1003 self.reinject_buf.is_empty()
1004 && (self
1005 .input
1006 .get(..self.offset)
1007 .and_then(|prefix| prefix.chars().next_back())
1008 .is_some_and(|prev| {
1009 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1010 })
1011 || self.is_inside_unclosed_double_paren_on_line())
1012 }
1013
1014 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1015 capture
1016 .as_deref()
1017 .unwrap_or(&self.input[start.offset..self.offset])
1018 }
1019
1020 fn current_word_surface_is_single_char(
1021 &self,
1022 start: Position,
1023 capture: &Option<String>,
1024 target: char,
1025 ) -> bool {
1026 let text = self.current_word_text(start, capture);
1027 if !text.contains('\x00') {
1028 let mut encoded = [0; 4];
1029 return text == target.encode_utf8(&mut encoded);
1030 }
1031
1032 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1033 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1034 }
1035
1036 fn current_word_surface_last_char<'b>(
1037 &'b self,
1038 start: Position,
1039 capture: &'b Option<String>,
1040 ) -> Option<char> {
1041 self.current_word_text(start, capture)
1042 .chars()
1043 .rev()
1044 .find(|&ch| ch != '\x00')
1045 }
1046
1047 fn current_word_surface_ends_with_char(
1048 &self,
1049 start: Position,
1050 capture: &Option<String>,
1051 target: char,
1052 ) -> bool {
1053 self.current_word_surface_last_char(start, capture) == Some(target)
1054 }
1055
1056 fn current_word_surface_ends_with_extglob_prefix(
1057 &self,
1058 start: Position,
1059 capture: &Option<String>,
1060 ) -> bool {
1061 self.current_word_surface_last_char(start, capture)
1062 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1063 }
1064
1065 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1067 self.skip_whitespace();
1068 let start = self.current_position();
1069 let token = self.next_lexed_token_inner(false)?;
1070 let end = self.current_position();
1071 Some(token.with_span(Span::from_positions(start, end)))
1072 }
1073
1074 pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1076 self.skip_whitespace();
1077 let start = self.current_position();
1078 let token = self.next_lexed_token_inner(true)?;
1079 let end = self.current_position();
1080 Some(token.with_span(Span::from_positions(start, end)))
1081 }
1082
1083 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1085 let ch = self.peek_char()?;
1086
1087 match ch {
1088 '\n' => {
1089 self.consume_ascii_chars(1);
1090 Some(LexedToken::punctuation(TokenKind::Newline))
1091 }
1092 ';' => {
1093 if self.second_char() == Some(';') {
1094 if self.third_char() == Some('&') {
1095 self.consume_ascii_chars(3);
1096 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1098 self.consume_ascii_chars(2);
1099 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1101 } else if self.second_char() == Some('|') {
1102 self.consume_ascii_chars(2);
1103 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1105 self.consume_ascii_chars(2);
1106 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1108 self.consume_ascii_chars(1);
1109 Some(LexedToken::punctuation(TokenKind::Semicolon))
1110 }
1111 }
1112 '|' => {
1113 if self.second_char() == Some('|') {
1114 self.consume_ascii_chars(2);
1115 Some(LexedToken::punctuation(TokenKind::Or))
1116 } else if self.second_char() == Some('&') {
1117 self.consume_ascii_chars(2);
1118 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1119 } else {
1120 self.consume_ascii_chars(1);
1121 Some(LexedToken::punctuation(TokenKind::Pipe))
1122 }
1123 }
1124 '&' => {
1125 if self.second_char() == Some('&') {
1126 self.consume_ascii_chars(2);
1127 Some(LexedToken::punctuation(TokenKind::And))
1128 } else if self.second_char() == Some('>') {
1129 if self.third_char() == Some('>') {
1130 self.consume_ascii_chars(3);
1131 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1132 } else {
1133 self.consume_ascii_chars(2);
1134 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1135 }
1136 } else if self.second_char() == Some('|') {
1137 self.consume_ascii_chars(2);
1138 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1139 } else if self.second_char() == Some('!') {
1140 self.consume_ascii_chars(2);
1141 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1142 } else {
1143 self.consume_ascii_chars(1);
1144 Some(LexedToken::punctuation(TokenKind::Background))
1145 }
1146 }
1147 '>' => {
1148 if self.second_char() == Some('>') {
1149 if self.third_char() == Some('|') {
1150 self.consume_ascii_chars(3);
1151 } else {
1152 self.consume_ascii_chars(2);
1153 }
1154 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1155 } else if self.second_char() == Some('|') {
1156 self.consume_ascii_chars(2);
1157 Some(LexedToken::punctuation(TokenKind::Clobber))
1158 } else if self.second_char() == Some('(') {
1159 self.consume_ascii_chars(2);
1160 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1161 } else if self.second_char() == Some('&') {
1162 self.consume_ascii_chars(2);
1163 Some(LexedToken::punctuation(TokenKind::DupOutput))
1164 } else {
1165 self.consume_ascii_chars(1);
1166 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1167 }
1168 }
1169 '<' => {
1170 if self.second_char() == Some('<') {
1171 if self.third_char() == Some('<') {
1172 self.consume_ascii_chars(3);
1173 Some(LexedToken::punctuation(TokenKind::HereString))
1174 } else if self.third_char() == Some('-') {
1175 self.consume_ascii_chars(3);
1176 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1177 } else {
1178 self.consume_ascii_chars(2);
1179 Some(LexedToken::punctuation(TokenKind::HereDoc))
1180 }
1181 } else if self.second_char() == Some('>') {
1182 self.consume_ascii_chars(2);
1183 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1184 } else if self.second_char() == Some('(') {
1185 self.consume_ascii_chars(2);
1186 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1187 } else if self.second_char() == Some('&') {
1188 self.consume_ascii_chars(2);
1189 Some(LexedToken::punctuation(TokenKind::DupInput))
1190 } else {
1191 self.consume_ascii_chars(1);
1192 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1193 }
1194 }
1195 '(' => {
1196 if self.second_char() == Some('(') {
1197 self.consume_ascii_chars(2);
1198 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1199 } else {
1200 self.consume_ascii_chars(1);
1201 Some(LexedToken::punctuation(TokenKind::LeftParen))
1202 }
1203 }
1204 ')' => {
1205 if self.second_char() == Some(')') {
1206 self.consume_ascii_chars(2);
1207 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1208 } else {
1209 self.consume_ascii_chars(1);
1210 Some(LexedToken::punctuation(TokenKind::RightParen))
1211 }
1212 }
1213 '{' => {
1214 let start = self.current_position();
1215 if self.ignore_braces_enabled() {
1216 self.consume_ascii_chars(1);
1217 match self.peek_char() {
1218 Some(' ') | Some('\t') | Some('\n') | None => {
1219 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1220 }
1221 _ => self.read_word_starting_with("{", start),
1222 }
1223 } else if self.looks_like_brace_expansion() {
1224 self.read_brace_expansion_word()
1228 } else if self.is_brace_group_start() {
1229 self.advance();
1230 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1231 } else if self.brace_literal_starts_case_pattern_delimiter() {
1232 self.read_word_starting_with("{", start)
1233 } else {
1234 self.read_brace_literal_word()
1235 }
1236 }
1237 '}' => {
1238 self.consume_ascii_chars(1);
1239 if self.ignore_close_braces_enabled() {
1240 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1241 } else {
1242 Some(LexedToken::punctuation(TokenKind::RightBrace))
1243 }
1244 }
1245 '[' => {
1246 let start = self.current_position();
1247 self.consume_ascii_chars(1);
1248 if self.peek_char() == Some('[')
1249 && matches!(
1250 self.second_char(),
1251 Some(' ') | Some('\t') | Some('\n') | None
1252 )
1253 {
1254 self.consume_ascii_chars(1);
1255 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1256 } else {
1257 match self.peek_char() {
1264 Some(' ') | Some('\t') | Some('\n') | None => {
1265 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1266 }
1267 _ => self.read_word_starting_with("[", start),
1268 }
1269 }
1270 }
1271 ']' => {
1272 if self.second_char() == Some(']') {
1273 self.consume_ascii_chars(2);
1274 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1275 } else {
1276 self.consume_ascii_chars(1);
1277 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1278 }
1279 }
1280 '\'' => self.read_single_quoted_string(),
1281 '"' => self.read_double_quoted_string(),
1282 '#' => {
1283 if self.should_treat_hash_as_word_char() {
1284 let start = self.current_position();
1285 return self.read_word_starting_with("#", start);
1286 }
1287 if preserve_comments {
1288 self.read_comment();
1289 Some(LexedToken::comment())
1290 } else {
1291 self.skip_comment();
1292 self.next_lexed_token_inner(false)
1293 }
1294 }
1295 '0'..='9' => self.read_word_or_fd_redirect(),
1297 _ => self.read_word(),
1298 }
1299 }
1300
1301 fn skip_whitespace(&mut self) {
1302 while let Some(ch) = self.peek_char() {
1303 if self.reinject_buf.is_empty() {
1304 let whitespace_len = self.source_horizontal_whitespace_len();
1305 if whitespace_len > 0 {
1306 self.consume_source_bytes(whitespace_len);
1307 continue;
1308 }
1309
1310 if self.cursor.rest().starts_with("\\\n") {
1311 self.consume_source_bytes(2);
1312 continue;
1313 }
1314 }
1315
1316 if ch == ' ' || ch == '\t' {
1317 self.consume_ascii_chars(1);
1318 } else if ch == '\\' {
1319 if self.second_char() == Some('\n') {
1321 self.consume_ascii_chars(2);
1322 } else {
1323 break;
1324 }
1325 } else {
1326 break;
1327 }
1328 }
1329 }
1330
1331 fn skip_comment(&mut self) {
1332 if self.reinject_buf.is_empty() {
1333 let end = self
1334 .cursor
1335 .find_byte(b'\n')
1336 .unwrap_or(self.cursor.rest().len());
1337 self.consume_source_bytes(end);
1338 return;
1339 }
1340
1341 while let Some(ch) = self.peek_char() {
1342 if ch == '\n' {
1343 break;
1344 }
1345 self.advance();
1346 }
1347 }
1348
1349 fn read_comment(&mut self) {
1350 debug_assert_eq!(self.peek_char(), Some('#'));
1351
1352 if self.reinject_buf.is_empty() {
1353 let rest = self.cursor.rest();
1354 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1355 self.consume_source_bytes(end);
1356 return;
1357 }
1358
1359 self.advance(); while let Some(ch) = self.peek_char() {
1362 if ch == '\n' {
1363 break;
1364 }
1365 self.advance();
1366 }
1367 }
1368
1369 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1370 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1371 return false;
1372 }
1373
1374 let line_start = self.input[..self.offset]
1375 .rfind('\n')
1376 .map_or(0, |index| index + 1);
1377 let prefix = &self.input[line_start..self.offset];
1378 line_has_unclosed_double_paren(prefix)
1379 }
1380
1381 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1384 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1385 let Some(fd) = first_digit.to_digit(10) else {
1386 unreachable!("peeked ASCII digit should convert to a base-10 digit");
1387 };
1388 let fd = fd as i32;
1389
1390 match (self.second_char(), self.third_char()) {
1391 (Some('>'), Some('>')) => {
1392 if self.fourth_char() == Some('|') {
1393 self.consume_ascii_chars(4);
1394 } else {
1395 self.consume_ascii_chars(3);
1396 }
1397 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1398 }
1399 (Some('>'), Some('|')) => {
1400 self.consume_ascii_chars(3);
1401 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1402 }
1403 (Some('>'), Some('&')) => {
1404 self.consume_ascii_chars(3);
1405
1406 let mut target_str = String::with_capacity(4);
1407 while let Some(c) = self.peek_char() {
1408 if c.is_ascii_digit() {
1409 target_str.push(c);
1410 self.advance();
1411 } else {
1412 break;
1413 }
1414 }
1415
1416 if target_str.is_empty() {
1417 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1418 }
1419
1420 let target_fd: i32 = target_str.parse().unwrap_or(1);
1421 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1422 }
1423 (Some('>'), _) => {
1424 self.consume_ascii_chars(2);
1425 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1426 }
1427 (Some('<'), Some('&')) => {
1428 self.consume_ascii_chars(3);
1429
1430 let mut target_str = String::with_capacity(4);
1431 while let Some(c) = self.peek_char() {
1432 if c.is_ascii_digit() || c == '-' {
1433 target_str.push(c);
1434 self.advance();
1435 if c == '-' {
1436 break;
1437 }
1438 } else {
1439 break;
1440 }
1441 }
1442
1443 if target_str == "-" {
1444 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1445 }
1446 let target_fd: i32 = target_str.parse().unwrap_or(0);
1447 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1448 }
1449 (Some('<'), Some('>')) => {
1450 self.consume_ascii_chars(3);
1451 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1452 }
1453 (Some('<'), Some('<')) => {}
1454 (Some('<'), _) => {
1455 self.consume_ascii_chars(2);
1456 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1457 }
1458 _ => {}
1459 }
1460 }
1461
1462 self.read_word()
1464 }
1465
1466 fn read_word_starting_with(
1467 &mut self,
1468 _prefix: &str,
1469 start: Position,
1470 ) -> Option<LexedToken<'a>> {
1471 let segment = match self.read_unquoted_segment(start) {
1472 Ok(segment) => segment,
1473 Err(kind) => return Some(LexedToken::error(kind)),
1474 };
1475 if segment.as_str().is_empty() {
1476 return None;
1477 }
1478 let mut lexed_word = LexedWord::from_segment(segment);
1479 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1480 return Some(LexedToken::error(kind));
1481 }
1482 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1483 }
1484
1485 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1486 let start = self.current_position();
1487
1488 if self.reinject_buf.is_empty() {
1489 let ascii_len = self.source_ascii_plain_word_len();
1490 let chunk = if ascii_len > 0
1491 && self
1492 .cursor
1493 .rest()
1494 .as_bytes()
1495 .get(ascii_len)
1496 .is_none_or(|byte| byte.is_ascii())
1497 {
1498 self.consume_source_bytes(ascii_len);
1499 &self.input[start.offset..self.offset]
1500 } else {
1501 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1502 self.advance_scanned_source_bytes(chunk.len());
1503 chunk
1504 };
1505 if !chunk.is_empty() {
1506 let continues = matches!(
1507 self.peek_char(),
1508 Some(next)
1509 if Self::is_word_char(next)
1510 || next == '$'
1511 || matches!(next, '\'' | '"')
1512 || next == '{'
1513 || (next == '\\' && self.second_char() == Some('\n'))
1514 || (next == '('
1515 && (chunk.ends_with('=')
1516 || Self::word_can_take_parenthesized_suffix(chunk)))
1517 );
1518
1519 if !continues {
1520 let end = self.current_position();
1521 return Some(LexedToken::borrowed_word(
1522 TokenKind::Word,
1523 &self.input[start.offset..self.offset],
1524 Some(Span::from_positions(start, end)),
1525 ));
1526 }
1527
1528 if self.peek_char() == Some('(')
1529 && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1530 {
1531 return self.read_complex_word(start);
1532 }
1533
1534 let end = self.current_position();
1535 return self.finish_segmented_word(LexedWord::borrowed(
1536 LexedWordSegmentKind::Plain,
1537 &self.input[start.offset..self.offset],
1538 Some(Span::from_positions(start, end)),
1539 ));
1540 }
1541 }
1542
1543 self.read_complex_word(start)
1544 }
1545
1546 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1547 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1548 return Some(LexedToken::error(kind));
1549 }
1550
1551 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1552 }
1553
1554 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1555 if self.peek_char() == Some('$') {
1556 match self.second_char() {
1557 Some('\'') => return self.read_dollar_single_quoted_string(),
1558 Some('"') => return self.read_dollar_double_quoted_string(),
1559 _ => {}
1560 }
1561 }
1562
1563 let segment = match self.read_unquoted_segment(start) {
1564 Ok(segment) => segment,
1565 Err(kind) => return Some(LexedToken::error(kind)),
1566 };
1567
1568 if segment.as_str().is_empty() {
1569 return None;
1570 }
1571
1572 self.finish_segmented_word(LexedWord::from_segment(segment))
1573 }
1574
1575 fn read_unquoted_segment(
1576 &mut self,
1577 start: Position,
1578 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1579 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1580 while let Some(ch) = self.peek_char() {
1581 if ch == '"' || ch == '\'' {
1582 break;
1583 } else if ch == '$' {
1584 if matches!(self.second_char(), Some('\'') | Some('"'))
1585 && (self.current_position().offset > start.offset
1586 || word.as_ref().is_some_and(|word| !word.is_empty()))
1587 {
1588 break;
1589 }
1590
1591 self.advance();
1593
1594 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1598 Self::push_capture_char(&mut word, '[');
1599 self.advance();
1600 if !self.read_legacy_arithmetic_into(&mut word, start) {
1601 return Err(LexerErrorKind::CommandSubstitution);
1602 }
1603 } else if self.peek_char() == Some('(') {
1604 if self.second_char() == Some('(') {
1605 if !self.read_arithmetic_expansion_into(&mut word) {
1606 return Err(LexerErrorKind::CommandSubstitution);
1607 }
1608 } else {
1609 Self::push_capture_char(&mut word, '(');
1610 self.advance();
1611 if !self.read_command_subst_into(&mut word) {
1612 return Err(LexerErrorKind::CommandSubstitution);
1613 }
1614 }
1615 } else if self.peek_char() == Some('{') {
1616 Self::push_capture_char(&mut word, '{');
1619 self.advance();
1620 let _ = self.read_param_expansion_into(&mut word, start);
1621 } else {
1622 if let Some(c) = self.peek_char() {
1624 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1625 || c.is_ascii_digit()
1626 {
1627 Self::push_capture_char(&mut word, c);
1628 self.advance();
1629 } else {
1630 while let Some(c) = self.peek_char() {
1632 if c.is_ascii_alphanumeric() || c == '_' {
1633 Self::push_capture_char(&mut word, c);
1634 self.advance();
1635 } else {
1636 break;
1637 }
1638 }
1639 }
1640 }
1641 }
1642 } else if ch == '{' {
1643 if self.looks_like_mid_word_brace_segment() {
1644 Self::push_capture_char(&mut word, ch);
1647 self.advance();
1648 self.consume_mid_word_brace_segment(&mut word);
1649 } else {
1650 Self::push_capture_char(&mut word, ch);
1653 self.advance();
1654 }
1655 } else if ch == '`' {
1656 let capture_end = self.current_position();
1659 self.ensure_capture_from_source(&mut word, start, capture_end);
1660 Self::push_capture_char(&mut word, ch);
1661 self.advance(); let mut closed = false;
1663 while let Some(c) = self.peek_char() {
1664 Self::push_capture_char(&mut word, c);
1665 self.advance();
1666 if c == '`' {
1667 closed = true;
1668 break;
1669 }
1670 if c == '\\'
1671 && let Some(next) = self.peek_char()
1672 {
1673 Self::push_capture_char(&mut word, next);
1674 self.advance();
1675 }
1676 }
1677 if !closed {
1678 return Err(LexerErrorKind::BacktickSubstitution);
1679 }
1680 } else if ch == '\\' {
1681 let capture_end = self.current_position();
1682 self.ensure_capture_from_source(&mut word, start, capture_end);
1683 self.advance();
1684 if let Some(next) = self.peek_char() {
1685 if next == '\n' {
1686 self.advance();
1688 } else {
1689 Self::push_capture_char(&mut word, '\x00');
1694 Self::push_capture_char(&mut word, next);
1695 self.advance();
1696 if next == '{'
1697 && self.current_word_surface_is_single_char(start, &word, '{')
1698 && self.escaped_brace_sequence_looks_like_brace_expansion()
1699 {
1700 let mut depth = 1;
1701 while let Some(c) = self.peek_char() {
1702 Self::push_capture_char(&mut word, c);
1703 self.advance();
1704 match c {
1705 '{' => depth += 1,
1706 '}' => {
1707 depth -= 1;
1708 if depth == 0 {
1709 break;
1710 }
1711 }
1712 _ => {}
1713 }
1714 }
1715 }
1716 }
1717 } else {
1718 Self::push_capture_char(&mut word, '\\');
1719 }
1720 } else if ch == '('
1721 && self.current_word_surface_ends_with_char(start, &word, '=')
1722 && self.looks_like_assoc_assign()
1723 {
1724 Self::push_capture_char(&mut word, ch);
1727 self.advance();
1728 let mut depth = 1;
1729 while let Some(c) = self.peek_char() {
1730 Self::push_capture_char(&mut word, c);
1731 self.advance();
1732 match c {
1733 '(' => depth += 1,
1734 ')' => {
1735 depth -= 1;
1736 if depth == 0 {
1737 break;
1738 }
1739 }
1740 '"' => {
1741 while let Some(qc) = self.peek_char() {
1742 Self::push_capture_char(&mut word, qc);
1743 self.advance();
1744 if qc == '"' {
1745 break;
1746 }
1747 if qc == '\\'
1748 && let Some(esc) = self.peek_char()
1749 {
1750 Self::push_capture_char(&mut word, esc);
1751 self.advance();
1752 }
1753 }
1754 }
1755 '\'' => {
1756 while let Some(qc) = self.peek_char() {
1757 Self::push_capture_char(&mut word, qc);
1758 self.advance();
1759 if qc == '\'' {
1760 break;
1761 }
1762 }
1763 }
1764 '\\' => {
1765 if let Some(esc) = self.peek_char() {
1766 Self::push_capture_char(&mut word, esc);
1767 self.advance();
1768 }
1769 }
1770 _ => {}
1771 }
1772 }
1773 } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1774 {
1775 Self::push_capture_char(&mut word, ch);
1778 self.advance();
1779 let mut depth = 1;
1780 while let Some(c) = self.peek_char() {
1781 Self::push_capture_char(&mut word, c);
1782 self.advance();
1783 match c {
1784 '(' => depth += 1,
1785 ')' => {
1786 depth -= 1;
1787 if depth == 0 {
1788 break;
1789 }
1790 }
1791 '\\' => {
1792 if let Some(esc) = self.peek_char() {
1793 Self::push_capture_char(&mut word, esc);
1794 self.advance();
1795 }
1796 }
1797 _ => {}
1798 }
1799 }
1800 } else if Self::is_plain_word_char(ch) {
1801 if self.reinject_buf.is_empty() {
1802 let ascii_len = self.source_ascii_plain_word_len();
1803 let chunk = if ascii_len > 0
1804 && self
1805 .cursor
1806 .rest()
1807 .as_bytes()
1808 .get(ascii_len)
1809 .is_none_or(|byte| byte.is_ascii())
1810 {
1811 self.consume_source_bytes(ascii_len);
1812 &self.input[self.offset - ascii_len..self.offset]
1813 } else {
1814 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1815 self.advance_scanned_source_bytes(chunk.len());
1816 chunk
1817 };
1818 Self::push_capture_str(&mut word, chunk);
1819 } else {
1820 Self::push_capture_char(&mut word, ch);
1821 self.advance();
1822 }
1823 } else {
1824 break;
1825 }
1826 }
1827
1828 if let Some(word) = word {
1829 let span = Some(Span::from_positions(start, self.current_position()));
1830 Ok(LexedWordSegment::owned_with_spans(
1831 LexedWordSegmentKind::Plain,
1832 word,
1833 span,
1834 span,
1835 ))
1836 } else {
1837 let end = self.current_position();
1838 Ok(LexedWordSegment::borrowed(
1839 LexedWordSegmentKind::Plain,
1840 &self.input[start.offset..self.offset],
1841 Some(Span::from_positions(start, end)),
1842 ))
1843 }
1844 }
1845
1846 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1847 let segment = match self.read_single_quoted_segment() {
1848 Ok(segment) => segment,
1849 Err(kind) => return Some(LexedToken::error(kind)),
1850 };
1851 let mut word = LexedWord::from_segment(segment);
1852 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1853 return Some(LexedToken::error(kind));
1854 }
1855
1856 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1857 }
1858
1859 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1860 debug_assert_eq!(self.peek_char(), Some('\''));
1861
1862 let wrapper_start = self.current_position();
1863 self.consume_ascii_chars(1); let content_start = self.current_position();
1865 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1866 let mut content_end = content_start;
1867 let mut content = String::with_capacity(16);
1868 let mut closed = false;
1869
1870 if can_borrow {
1871 let rest = self.cursor.rest();
1872 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1873 self.consume_source_bytes(quote_index);
1874 content_end = self.current_position();
1875 self.consume_ascii_chars(1); closed = true;
1877 } else {
1878 self.consume_source_bytes(rest.len());
1879 }
1880 }
1881
1882 while let Some(ch) = self.peek_char() {
1883 if closed {
1884 break;
1885 }
1886 if ch == '\'' {
1887 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1888 if !can_borrow {
1889 content.push('\'');
1890 }
1891 self.advance();
1892 self.advance();
1893 continue;
1894 }
1895 content_end = self.current_position();
1896 self.consume_ascii_chars(1); closed = true;
1898 break;
1899 }
1900 if !can_borrow {
1901 content.push(ch);
1902 }
1903 self.advance();
1904 }
1905
1906 if !closed {
1907 return Err(LexerErrorKind::SingleQuote);
1908 }
1909
1910 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1911 let content_span = Some(Span::from_positions(content_start, content_end));
1912
1913 if can_borrow {
1914 Ok(LexedWordSegment::borrowed_with_spans(
1915 LexedWordSegmentKind::SingleQuoted,
1916 &self.input[content_start.offset..content_end.offset],
1917 content_span,
1918 wrapper_span,
1919 ))
1920 } else {
1921 Ok(LexedWordSegment::owned_with_spans(
1922 LexedWordSegmentKind::SingleQuoted,
1923 content,
1924 content_span,
1925 wrapper_span,
1926 ))
1927 }
1928 }
1929
1930 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1931 let segment = match self.read_dollar_single_quoted_segment() {
1932 Ok(segment) => segment,
1933 Err(kind) => return Some(LexedToken::error(kind)),
1934 };
1935 let mut word = LexedWord::from_segment(segment);
1936 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1937 return Some(LexedToken::error(kind));
1938 }
1939
1940 let kind = if word.single_segment().is_some() {
1941 TokenKind::LiteralWord
1942 } else {
1943 TokenKind::Word
1944 };
1945
1946 Some(LexedToken::with_word_payload(kind, word))
1947 }
1948
1949 fn read_dollar_single_quoted_segment(
1950 &mut self,
1951 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1952 debug_assert_eq!(self.peek_char(), Some('$'));
1953 debug_assert_eq!(self.second_char(), Some('\''));
1954
1955 let wrapper_start = self.current_position();
1956 self.consume_ascii_chars(2); let content_start = self.current_position();
1958 let mut out = String::with_capacity(16);
1959
1960 while let Some(ch) = self.peek_char() {
1961 if ch == '\'' {
1962 let content_end = self.current_position();
1963 self.advance();
1964 let wrapper_span =
1965 Some(Span::from_positions(wrapper_start, self.current_position()));
1966 let content_span = Some(Span::from_positions(content_start, content_end));
1967 return Ok(LexedWordSegment::owned_with_spans(
1968 LexedWordSegmentKind::DollarSingleQuoted,
1969 out,
1970 content_span,
1971 wrapper_span,
1972 ));
1973 }
1974
1975 if ch == '\\' {
1976 self.advance();
1977 if let Some(esc) = self.peek_char() {
1978 self.advance();
1979 match esc {
1980 'n' => out.push('\n'),
1981 't' => out.push('\t'),
1982 'r' => out.push('\r'),
1983 'a' => out.push('\x07'),
1984 'b' => out.push('\x08'),
1985 'f' => out.push('\x0C'),
1986 'v' => out.push('\x0B'),
1987 'e' | 'E' => out.push('\x1B'),
1988 '\\' => out.push('\\'),
1989 '\'' => out.push('\''),
1990 '"' => out.push('"'),
1991 '?' => out.push('?'),
1992 'c' => {
1993 if let Some(control) = self.peek_char() {
1994 self.advance();
1995 out.push(((control as u32 & 0x1F) as u8) as char);
1996 } else {
1997 out.push('\\');
1998 out.push('c');
1999 }
2000 }
2001 'x' => {
2002 let mut hex = String::new();
2003 for _ in 0..2 {
2004 if let Some(h) = self.peek_char() {
2005 if h.is_ascii_hexdigit() {
2006 hex.push(h);
2007 self.advance();
2008 } else {
2009 break;
2010 }
2011 }
2012 }
2013 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2014 out.push(val as char);
2015 }
2016 }
2017 'u' => {
2018 let mut hex = String::new();
2019 for _ in 0..4 {
2020 if let Some(h) = self.peek_char() {
2021 if h.is_ascii_hexdigit() {
2022 hex.push(h);
2023 self.advance();
2024 } else {
2025 break;
2026 }
2027 }
2028 }
2029 if let Ok(val) = u32::from_str_radix(&hex, 16)
2030 && let Some(c) = char::from_u32(val)
2031 {
2032 out.push(c);
2033 }
2034 }
2035 'U' => {
2036 let mut hex = String::new();
2037 for _ in 0..8 {
2038 if let Some(h) = self.peek_char() {
2039 if h.is_ascii_hexdigit() {
2040 hex.push(h);
2041 self.advance();
2042 } else {
2043 break;
2044 }
2045 }
2046 }
2047 if let Ok(val) = u32::from_str_radix(&hex, 16)
2048 && let Some(c) = char::from_u32(val)
2049 {
2050 out.push(c);
2051 }
2052 }
2053 '0'..='7' => {
2054 let mut oct = String::new();
2055 oct.push(esc);
2056 for _ in 0..2 {
2057 if let Some(o) = self.peek_char() {
2058 if o.is_ascii_digit() && o < '8' {
2059 oct.push(o);
2060 self.advance();
2061 } else {
2062 break;
2063 }
2064 }
2065 }
2066 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2067 out.push(val as char);
2068 }
2069 }
2070 _ => {
2071 out.push('\\');
2072 out.push(esc);
2073 }
2074 }
2075 } else {
2076 out.push('\\');
2077 }
2078 continue;
2079 }
2080
2081 out.push(ch);
2082 self.advance();
2083 }
2084
2085 Err(LexerErrorKind::SingleQuote)
2086 }
2087
2088 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2089 let start = self.current_position();
2090
2091 if self.reinject_buf.is_empty() {
2092 let ascii_len = self.source_ascii_plain_word_len();
2093 let chunk = if ascii_len > 0
2094 && self
2095 .cursor
2096 .rest()
2097 .as_bytes()
2098 .get(ascii_len)
2099 .is_none_or(|byte| byte.is_ascii())
2100 {
2101 self.consume_source_bytes(ascii_len);
2102 &self.input[start.offset..self.offset]
2103 } else {
2104 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2105 self.advance_scanned_source_bytes(chunk.len());
2106 chunk
2107 };
2108 if chunk.is_empty() {
2109 return None;
2110 }
2111
2112 let end = self.current_position();
2113 return Some(LexedWordSegment::borrowed(
2114 LexedWordSegmentKind::Plain,
2115 &self.input[start.offset..self.offset],
2116 Some(Span::from_positions(start, end)),
2117 ));
2118 }
2119
2120 let ch = self.peek_char()?;
2121 if !Self::is_plain_word_char(ch) {
2122 return None;
2123 }
2124
2125 let mut text = String::with_capacity(16);
2126 while let Some(ch) = self.peek_char() {
2127 if !Self::is_plain_word_char(ch) {
2128 break;
2129 }
2130 text.push(ch);
2131 self.advance();
2132 }
2133
2134 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2135 }
2136
2137 fn append_segmented_continuation(
2140 &mut self,
2141 word: &mut LexedWord<'a>,
2142 ) -> Result<(), LexerErrorKind> {
2143 loop {
2144 match self.peek_char() {
2145 Some('\\') if self.second_char() == Some('\n') => {
2146 self.advance();
2147 self.advance();
2148 continue;
2149 }
2150 Some('\'') => {
2151 word.push_segment(self.read_single_quoted_segment()?);
2152 }
2153 Some('"') => {
2154 word.push_segment(self.read_double_quoted_segment()?);
2155 }
2156 Some('$') if self.second_char() == Some('\'') => {
2157 word.push_segment(self.read_dollar_single_quoted_segment()?);
2158 }
2159 Some('$') if self.second_char() == Some('"') => {
2160 word.push_segment(self.read_dollar_double_quoted_segment()?);
2161 }
2162 Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2163 let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2164 unreachable!("peeked '(' should produce a suffix segment");
2165 };
2166 word.push_segment(segment);
2167 }
2168 _ => {
2169 if let Some(segment) = self.read_plain_continuation_segment() {
2170 word.push_segment(segment);
2171 continue;
2172 }
2173
2174 let start = self.current_position();
2175 let plain = self.read_unquoted_segment(start)?;
2176 if plain.as_str().is_empty() {
2177 break;
2178 }
2179 word.push_segment(plain);
2180 }
2181 }
2182 }
2183
2184 Ok(())
2185 }
2186
2187 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2188 debug_assert_eq!(self.peek_char(), Some('('));
2189
2190 let start = self.current_position();
2191 let mut depth = 0usize;
2192 let mut escaped = false;
2193 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2194
2195 while let Some(ch) = self.peek_char() {
2196 if let Some(text) = text.as_mut() {
2197 text.push(ch);
2198 }
2199 self.advance();
2200
2201 if escaped {
2202 escaped = false;
2203 continue;
2204 }
2205
2206 match ch {
2207 '\\' => escaped = true,
2208 '(' => depth += 1,
2209 ')' => {
2210 depth = depth.saturating_sub(1);
2211 if depth == 0 {
2212 break;
2213 }
2214 }
2215 _ => {}
2216 }
2217 }
2218
2219 let end = self.current_position();
2220 let span = Some(Span::from_positions(start, end));
2221 if let Some(text) = text {
2222 Some(LexedWordSegment::owned_with_spans(
2223 LexedWordSegmentKind::Plain,
2224 text,
2225 span,
2226 span,
2227 ))
2228 } else {
2229 Some(LexedWordSegment::borrowed_with_spans(
2230 LexedWordSegmentKind::Plain,
2231 &self.input[start.offset..end.offset],
2232 span,
2233 span,
2234 ))
2235 }
2236 }
2237
2238 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2239 self.read_double_quoted_word(false)
2240 }
2241
2242 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2243 self.read_double_quoted_word(true)
2244 }
2245
2246 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2247 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2248 Ok(segment) => segment,
2249 Err(kind) => return Some(LexedToken::error(kind)),
2250 };
2251 let mut word = LexedWord::from_segment(segment);
2252 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2253 return Some(LexedToken::error(kind));
2254 }
2255
2256 let kind = if word.single_segment().is_some() {
2257 TokenKind::QuotedWord
2258 } else {
2259 TokenKind::Word
2260 };
2261
2262 Some(LexedToken::with_word_payload(kind, word))
2263 }
2264
2265 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2266 self.read_double_quoted_segment_with_dollar(false)
2267 }
2268
2269 fn read_dollar_double_quoted_segment(
2270 &mut self,
2271 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2272 self.read_double_quoted_segment_with_dollar(true)
2273 }
2274
2275 fn read_double_quoted_segment_with_dollar(
2276 &mut self,
2277 dollar: bool,
2278 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2279 if dollar {
2280 debug_assert_eq!(self.peek_char(), Some('$'));
2281 debug_assert_eq!(self.second_char(), Some('"'));
2282 } else {
2283 debug_assert_eq!(self.peek_char(), Some('"'));
2284 }
2285
2286 let wrapper_start = self.current_position();
2287 if dollar {
2288 self.consume_ascii_chars(2); } else {
2290 self.consume_ascii_chars(1); }
2292 let content_start = self.current_position();
2293 let mut content_end = content_start;
2294 let mut simple = self.reinject_buf.is_empty();
2295 let mut borrowable = self.reinject_buf.is_empty();
2296 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2297 let mut closed = false;
2298
2299 while let Some(ch) = self.peek_char() {
2300 if simple {
2301 if self.reinject_buf.is_empty() {
2302 let rest = self.cursor.rest();
2303 match Self::find_double_quote_special(rest) {
2304 Some(index) if index > 0 => {
2305 self.consume_source_bytes(index);
2306 continue;
2307 }
2308 None => {
2309 self.consume_source_bytes(rest.len());
2310 return Err(LexerErrorKind::DoubleQuote);
2311 }
2312 _ => {}
2313 }
2314 }
2315
2316 match ch {
2317 '"' => {
2318 content_end = self.current_position();
2319 self.consume_ascii_chars(1); closed = true;
2321 break;
2322 }
2323 '\\' | '$' | '`' => {
2324 simple = false;
2325 if ch == '`' {
2326 borrowable = false;
2327 let capture_end = self.current_position();
2328 self.ensure_capture_from_source(
2329 &mut content,
2330 content_start,
2331 capture_end,
2332 );
2333 }
2334 }
2335 _ => {
2336 self.advance();
2337 }
2338 }
2339 if simple {
2340 continue;
2341 }
2342 }
2343
2344 match ch {
2345 '"' => {
2346 if borrowable {
2347 content_end = self.current_position();
2348 }
2349 self.consume_ascii_chars(1); closed = true;
2351 break;
2352 }
2353 '\\' => {
2354 let escape_start = self.current_position();
2355 self.advance();
2356 if let Some(next) = self.peek_char() {
2357 match next {
2358 '\n' => {
2359 borrowable = false;
2360 self.ensure_capture_from_source(
2361 &mut content,
2362 content_start,
2363 escape_start,
2364 );
2365 self.advance();
2366 }
2367 '$' => {
2368 borrowable = false;
2369 self.ensure_capture_from_source(
2370 &mut content,
2371 content_start,
2372 escape_start,
2373 );
2374 Self::push_capture_char(&mut content, '\x00');
2375 Self::push_capture_char(&mut content, '$');
2376 self.advance();
2377 }
2378 '"' | '\\' | '`' => {
2379 borrowable = false;
2380 self.ensure_capture_from_source(
2381 &mut content,
2382 content_start,
2383 escape_start,
2384 );
2385 if next == '\\' {
2386 Self::push_capture_char(&mut content, '\x00');
2387 }
2388 if next == '`' {
2389 Self::push_capture_char(&mut content, '\x00');
2390 }
2391 Self::push_capture_char(&mut content, next);
2392 self.advance();
2393 content_end = self.current_position();
2394 }
2395 _ => {
2396 Self::push_capture_char(&mut content, '\\');
2397 Self::push_capture_char(&mut content, next);
2398 self.advance();
2399 content_end = self.current_position();
2400 }
2401 }
2402 }
2403 }
2404 '$' => {
2405 Self::push_capture_char(&mut content, '$');
2406 self.advance();
2407 if self.peek_char() == Some('(') {
2408 if self.second_char() == Some('(') {
2409 self.read_arithmetic_expansion_into(&mut content);
2410 } else {
2411 Self::push_capture_char(&mut content, '(');
2412 self.advance();
2413 self.read_command_subst_into(&mut content);
2414 }
2415 } else if self.peek_char() == Some('{') {
2416 Self::push_capture_char(&mut content, '{');
2417 self.advance();
2418 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2419 }
2420 content_end = self.current_position();
2421 }
2422 '`' => {
2423 borrowable = false;
2424 let capture_end = self.current_position();
2425 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2426 Self::push_capture_char(&mut content, '`');
2427 self.advance(); while let Some(c) = self.peek_char() {
2429 Self::push_capture_char(&mut content, c);
2430 self.advance();
2431 if c == '`' {
2432 break;
2433 }
2434 if c == '\\'
2435 && let Some(next) = self.peek_char()
2436 {
2437 Self::push_capture_char(&mut content, next);
2438 self.advance();
2439 }
2440 }
2441 content_end = self.current_position();
2442 }
2443 _ => {
2444 Self::push_capture_char(&mut content, ch);
2445 self.advance();
2446 content_end = self.current_position();
2447 }
2448 }
2449 }
2450
2451 if !closed {
2452 return Err(LexerErrorKind::DoubleQuote);
2453 }
2454
2455 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2456 let content_span = Some(Span::from_positions(content_start, content_end));
2457
2458 if borrowable {
2459 Ok(LexedWordSegment::borrowed_with_spans(
2460 if dollar {
2461 LexedWordSegmentKind::DollarDoubleQuoted
2462 } else {
2463 LexedWordSegmentKind::DoubleQuoted
2464 },
2465 &self.input[content_start.offset..content_end.offset],
2466 content_span,
2467 wrapper_span,
2468 ))
2469 } else {
2470 Ok(LexedWordSegment::owned_with_spans(
2471 if dollar {
2472 LexedWordSegmentKind::DollarDoubleQuoted
2473 } else {
2474 LexedWordSegmentKind::DoubleQuoted
2475 },
2476 content.unwrap_or_default(),
2477 content_span,
2478 wrapper_span,
2479 ))
2480 }
2481 }
2482
2483 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2484 debug_assert_eq!(self.peek_char(), Some('('));
2485 debug_assert_eq!(self.second_char(), Some('('));
2486
2487 Self::push_capture_char(content, '(');
2488 self.advance();
2489 Self::push_capture_char(content, '(');
2490 self.advance();
2491
2492 let mut depth = 2;
2493 while let Some(c) = self.peek_char() {
2494 match c {
2495 '\\' => {
2496 Self::push_capture_char(content, c);
2497 self.advance();
2498 if let Some(next) = self.peek_char() {
2499 Self::push_capture_char(content, next);
2500 self.advance();
2501 }
2502 }
2503 '\'' => {
2504 Self::push_capture_char(content, c);
2505 self.advance();
2506 while let Some(quoted) = self.peek_char() {
2507 Self::push_capture_char(content, quoted);
2508 self.advance();
2509 if quoted == '\'' {
2510 break;
2511 }
2512 }
2513 }
2514 '"' => {
2515 let mut escaped = false;
2516 Self::push_capture_char(content, c);
2517 self.advance();
2518 while let Some(quoted) = self.peek_char() {
2519 Self::push_capture_char(content, quoted);
2520 self.advance();
2521 if escaped {
2522 escaped = false;
2523 continue;
2524 }
2525 match quoted {
2526 '\\' => escaped = true,
2527 '"' => break,
2528 _ => {}
2529 }
2530 }
2531 }
2532 '`' => {
2533 let mut escaped = false;
2534 Self::push_capture_char(content, c);
2535 self.advance();
2536 while let Some(quoted) = self.peek_char() {
2537 Self::push_capture_char(content, quoted);
2538 self.advance();
2539 if escaped {
2540 escaped = false;
2541 continue;
2542 }
2543 match quoted {
2544 '\\' => escaped = true,
2545 '`' => break,
2546 _ => {}
2547 }
2548 }
2549 }
2550 '(' => {
2551 Self::push_capture_char(content, c);
2552 self.advance();
2553 depth += 1;
2554 }
2555 ')' => {
2556 Self::push_capture_char(content, c);
2557 self.advance();
2558 depth -= 1;
2559 if depth == 0 {
2560 return true;
2561 }
2562 }
2563 _ => {
2564 Self::push_capture_char(content, c);
2565 self.advance();
2566 }
2567 }
2568 }
2569
2570 false
2571 }
2572
2573 fn read_legacy_arithmetic_into(
2574 &mut self,
2575 content: &mut Option<String>,
2576 segment_start: Position,
2577 ) -> bool {
2578 let mut bracket_depth = 1;
2579
2580 while let Some(c) = self.peek_char() {
2581 match c {
2582 '\\' => {
2583 Self::push_capture_char(content, c);
2584 self.advance();
2585 if let Some(next) = self.peek_char() {
2586 Self::push_capture_char(content, next);
2587 self.advance();
2588 }
2589 }
2590 '\'' => {
2591 Self::push_capture_char(content, c);
2592 self.advance();
2593 while let Some(quoted) = self.peek_char() {
2594 Self::push_capture_char(content, quoted);
2595 self.advance();
2596 if quoted == '\'' {
2597 break;
2598 }
2599 }
2600 }
2601 '"' => {
2602 let mut escaped = false;
2603 Self::push_capture_char(content, c);
2604 self.advance();
2605 while let Some(quoted) = self.peek_char() {
2606 Self::push_capture_char(content, quoted);
2607 self.advance();
2608 if escaped {
2609 escaped = false;
2610 continue;
2611 }
2612 match quoted {
2613 '\\' => escaped = true,
2614 '"' => break,
2615 _ => {}
2616 }
2617 }
2618 }
2619 '`' => {
2620 let mut escaped = false;
2621 Self::push_capture_char(content, c);
2622 self.advance();
2623 while let Some(quoted) = self.peek_char() {
2624 Self::push_capture_char(content, quoted);
2625 self.advance();
2626 if escaped {
2627 escaped = false;
2628 continue;
2629 }
2630 match quoted {
2631 '\\' => escaped = true,
2632 '`' => break,
2633 _ => {}
2634 }
2635 }
2636 }
2637 '[' => {
2638 Self::push_capture_char(content, c);
2639 self.advance();
2640 bracket_depth += 1;
2641 }
2642 ']' => {
2643 Self::push_capture_char(content, c);
2644 self.advance();
2645 bracket_depth -= 1;
2646 if bracket_depth == 0 {
2647 return true;
2648 }
2649 }
2650 '$' => {
2651 Self::push_capture_char(content, c);
2652 self.advance();
2653 if self.peek_char() == Some('(') {
2654 if self.second_char() == Some('(') {
2655 if !self.read_arithmetic_expansion_into(content) {
2656 return false;
2657 }
2658 } else {
2659 Self::push_capture_char(content, '(');
2660 self.advance();
2661 if !self.read_command_subst_into(content) {
2662 return false;
2663 }
2664 }
2665 } else if self.peek_char() == Some('{') {
2666 Self::push_capture_char(content, '{');
2667 self.advance();
2668 if !self.read_param_expansion_into(content, segment_start) {
2669 return false;
2670 }
2671 } else if self.peek_char() == Some('[') {
2672 Self::push_capture_char(content, '[');
2673 self.advance();
2674 if !self.read_legacy_arithmetic_into(content, segment_start) {
2675 return false;
2676 }
2677 }
2678 }
2679 _ => {
2680 Self::push_capture_char(content, c);
2681 self.advance();
2682 }
2683 }
2684 }
2685
2686 false
2687 }
2688
2689 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2693 self.read_command_subst_into_depth(content, 0)
2694 }
2695
2696 fn flush_command_subst_keyword(
2697 current_word: &mut String,
2698 pending_case_headers: &mut usize,
2699 case_clause_depths: &mut SmallVec<[usize; 4]>,
2700 depth: usize,
2701 word_started_at_command_start: &mut bool,
2702 ) {
2703 if current_word.is_empty() {
2704 *word_started_at_command_start = false;
2705 return;
2706 }
2707
2708 match current_word.as_str() {
2709 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2710 "in" if *pending_case_headers > 0 => {
2711 *pending_case_headers -= 1;
2712 case_clause_depths.push(depth);
2713 }
2714 "esac" if *word_started_at_command_start => {
2715 case_clause_depths.pop();
2716 }
2717 _ => {}
2718 }
2719
2720 current_word.clear();
2721 *word_started_at_command_start = false;
2722 }
2723
2724 fn read_command_subst_heredoc_delimiter_into(
2725 &mut self,
2726 content: &mut Option<String>,
2727 ) -> Option<String> {
2728 while let Some(ch) = self.peek_char() {
2729 if !matches!(ch, ' ' | '\t') {
2730 break;
2731 }
2732 Self::push_capture_char(content, ch);
2733 self.advance();
2734 }
2735
2736 let mut cooked = String::new();
2737 let mut in_single = false;
2738 let mut in_double = false;
2739 let mut escaped = false;
2740 let mut saw_any = false;
2741
2742 while let Some(ch) = self.peek_char() {
2743 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2744 break;
2745 }
2746
2747 saw_any = true;
2748 Self::push_capture_char(content, ch);
2749 self.advance();
2750
2751 if escaped {
2752 cooked.push(ch);
2753 escaped = false;
2754 continue;
2755 }
2756
2757 match ch {
2758 '\\' if !in_single => escaped = true,
2759 '\'' if !in_double => in_single = !in_single,
2760 '"' if !in_single => in_double = !in_double,
2761 _ => cooked.push(ch),
2762 }
2763 }
2764
2765 saw_any.then_some(cooked)
2766 }
2767
2768 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2769 Self::push_capture_char(content, '`');
2770 self.advance();
2771 while let Some(ch) = self.peek_char() {
2772 Self::push_capture_char(content, ch);
2773 self.advance();
2774 if ch == '\\' {
2775 if let Some(esc) = self.peek_char() {
2776 Self::push_capture_char(content, esc);
2777 self.advance();
2778 }
2779 continue;
2780 }
2781 if ch == '`' {
2782 break;
2783 }
2784 }
2785 }
2786
2787 fn read_command_subst_pending_heredoc_into(
2788 &mut self,
2789 content: &mut Option<String>,
2790 delimiter: &str,
2791 strip_tabs: bool,
2792 ) -> bool {
2793 loop {
2794 let mut line = String::new();
2795 let mut saw_newline = false;
2796
2797 while let Some(ch) = self.peek_char() {
2798 self.advance();
2799 if ch == '\n' {
2800 saw_newline = true;
2801 break;
2802 }
2803 line.push(ch);
2804 }
2805
2806 Self::push_capture_str(content, &line);
2807 if saw_newline {
2808 Self::push_capture_char(content, '\n');
2809 }
2810
2811 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2812 return true;
2813 }
2814 }
2815 }
2816
2817 fn read_command_subst_into_depth(
2818 &mut self,
2819 content: &mut Option<String>,
2820 subst_depth: usize,
2821 ) -> bool {
2822 if subst_depth >= self.max_subst_depth {
2823 let mut depth = 1;
2825 while let Some(c) = self.peek_char() {
2826 self.advance();
2827 match c {
2828 '(' => depth += 1,
2829 ')' => {
2830 depth -= 1;
2831 if depth == 0 {
2832 Self::push_capture_char(content, ')');
2833 return true;
2834 }
2835 }
2836 _ => {}
2837 }
2838 }
2839 return false;
2840 }
2841
2842 let mut depth = 1;
2843 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2844 let mut pending_case_headers = 0usize;
2845 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2846 let mut current_word = String::with_capacity(16);
2847 let mut at_command_start = true;
2848 let mut expecting_redirection_target = false;
2849 let mut current_word_started_at_command_start = false;
2850 while let Some(c) = self.peek_char() {
2851 match c {
2852 '#' if !self.should_treat_hash_as_word_char() => {
2853 let had_word = !current_word.is_empty();
2854 Self::flush_command_subst_keyword(
2855 &mut current_word,
2856 &mut pending_case_headers,
2857 &mut case_clause_depths,
2858 depth,
2859 &mut current_word_started_at_command_start,
2860 );
2861 if had_word && expecting_redirection_target {
2862 expecting_redirection_target = false;
2863 }
2864 Self::push_capture_char(content, '#');
2865 self.advance();
2866 while let Some(comment_ch) = self.peek_char() {
2867 Self::push_capture_char(content, comment_ch);
2868 self.advance();
2869 if comment_ch == '\n' {
2870 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2871 if !self.read_command_subst_pending_heredoc_into(
2872 content, &delimiter, strip_tabs,
2873 ) {
2874 return false;
2875 }
2876 }
2877 at_command_start = true;
2878 expecting_redirection_target = false;
2879 break;
2880 }
2881 }
2882 }
2883 '(' => {
2884 Self::flush_command_subst_keyword(
2885 &mut current_word,
2886 &mut pending_case_headers,
2887 &mut case_clause_depths,
2888 depth,
2889 &mut current_word_started_at_command_start,
2890 );
2891 depth += 1;
2892 Self::push_capture_char(content, c);
2893 self.advance();
2894 at_command_start = true;
2895 expecting_redirection_target = false;
2896 }
2897 ')' => {
2898 Self::flush_command_subst_keyword(
2899 &mut current_word,
2900 &mut pending_case_headers,
2901 &mut case_clause_depths,
2902 depth,
2903 &mut current_word_started_at_command_start,
2904 );
2905 if case_clause_depths
2906 .last()
2907 .is_some_and(|case_depth| *case_depth == depth)
2908 {
2909 Self::push_capture_char(content, ')');
2910 self.advance();
2911 at_command_start = true;
2912 expecting_redirection_target = false;
2913 continue;
2914 }
2915 depth -= 1;
2916 self.advance();
2917 if depth == 0 {
2918 Self::push_capture_char(content, ')');
2919 return true;
2920 }
2921 Self::push_capture_char(content, c);
2922 at_command_start = false;
2923 expecting_redirection_target = false;
2924 }
2925 '"' => {
2926 let had_word = !current_word.is_empty();
2927 Self::flush_command_subst_keyword(
2928 &mut current_word,
2929 &mut pending_case_headers,
2930 &mut case_clause_depths,
2931 depth,
2932 &mut current_word_started_at_command_start,
2933 );
2934 if had_word && expecting_redirection_target {
2935 expecting_redirection_target = false;
2936 }
2937 Self::push_capture_char(content, '"');
2939 self.advance();
2940 while let Some(qc) = self.peek_char() {
2941 match qc {
2942 '"' => {
2943 Self::push_capture_char(content, '"');
2944 self.advance();
2945 break;
2946 }
2947 '\\' => {
2948 Self::push_capture_char(content, '\\');
2949 self.advance();
2950 if let Some(esc) = self.peek_char() {
2951 Self::push_capture_char(content, esc);
2952 self.advance();
2953 }
2954 }
2955 '$' => {
2956 Self::push_capture_char(content, '$');
2957 self.advance();
2958 if self.peek_char() == Some('(') {
2959 if self.second_char() == Some('(') {
2960 if !self.read_arithmetic_expansion_into(content) {
2961 return false;
2962 }
2963 } else {
2964 Self::push_capture_char(content, '(');
2965 self.advance();
2966 if !self
2967 .read_command_subst_into_depth(content, subst_depth + 1)
2968 {
2969 return false;
2970 }
2971 }
2972 }
2973 }
2974 _ => {
2975 Self::push_capture_char(content, qc);
2976 self.advance();
2977 }
2978 }
2979 }
2980 if expecting_redirection_target {
2981 expecting_redirection_target = false;
2982 } else {
2983 at_command_start = false;
2984 }
2985 }
2986 '\'' => {
2987 let had_word = !current_word.is_empty();
2988 Self::flush_command_subst_keyword(
2989 &mut current_word,
2990 &mut pending_case_headers,
2991 &mut case_clause_depths,
2992 depth,
2993 &mut current_word_started_at_command_start,
2994 );
2995 if had_word && expecting_redirection_target {
2996 expecting_redirection_target = false;
2997 }
2998 Self::push_capture_char(content, '\'');
3000 self.advance();
3001 while let Some(qc) = self.peek_char() {
3002 Self::push_capture_char(content, qc);
3003 self.advance();
3004 if qc == '\'' {
3005 break;
3006 }
3007 }
3008 if expecting_redirection_target {
3009 expecting_redirection_target = false;
3010 } else {
3011 at_command_start = false;
3012 }
3013 }
3014 '`' => {
3015 let had_word = !current_word.is_empty();
3016 Self::flush_command_subst_keyword(
3017 &mut current_word,
3018 &mut pending_case_headers,
3019 &mut case_clause_depths,
3020 depth,
3021 &mut current_word_started_at_command_start,
3022 );
3023 if had_word && expecting_redirection_target {
3024 expecting_redirection_target = false;
3025 }
3026 self.read_command_subst_backtick_segment_into(content);
3027 if expecting_redirection_target {
3028 expecting_redirection_target = false;
3029 } else {
3030 at_command_start = false;
3031 }
3032 }
3033 '$' if self.second_char() == Some('\'') => {
3034 let had_word = !current_word.is_empty();
3035 Self::flush_command_subst_keyword(
3036 &mut current_word,
3037 &mut pending_case_headers,
3038 &mut case_clause_depths,
3039 depth,
3040 &mut current_word_started_at_command_start,
3041 );
3042 if had_word && expecting_redirection_target {
3043 expecting_redirection_target = false;
3044 }
3045 Self::push_capture_char(content, '$');
3046 self.advance();
3047 Self::push_capture_char(content, '\'');
3048 self.advance();
3049 while let Some(qc) = self.peek_char() {
3050 Self::push_capture_char(content, qc);
3051 self.advance();
3052 if qc == '\\' {
3053 if let Some(esc) = self.peek_char() {
3054 Self::push_capture_char(content, esc);
3055 self.advance();
3056 }
3057 continue;
3058 }
3059 if qc == '\'' {
3060 break;
3061 }
3062 }
3063 if expecting_redirection_target {
3064 expecting_redirection_target = false;
3065 } else {
3066 at_command_start = false;
3067 }
3068 }
3069 '\\' => {
3070 let had_word = !current_word.is_empty();
3071 Self::flush_command_subst_keyword(
3072 &mut current_word,
3073 &mut pending_case_headers,
3074 &mut case_clause_depths,
3075 depth,
3076 &mut current_word_started_at_command_start,
3077 );
3078 if had_word && expecting_redirection_target {
3079 expecting_redirection_target = false;
3080 }
3081 Self::push_capture_char(content, '\\');
3082 self.advance();
3083 if let Some(esc) = self.peek_char() {
3084 Self::push_capture_char(content, esc);
3085 self.advance();
3086 }
3087 if expecting_redirection_target {
3088 expecting_redirection_target = false;
3089 } else {
3090 at_command_start = false;
3091 }
3092 }
3093 '<' if self.second_char() == Some('<') => {
3094 let word_was_redirection_fd = current_word_started_at_command_start
3095 && !current_word.is_empty()
3096 && current_word.chars().all(|current| current.is_ascii_digit());
3097 Self::flush_command_subst_keyword(
3098 &mut current_word,
3099 &mut pending_case_headers,
3100 &mut case_clause_depths,
3101 depth,
3102 &mut current_word_started_at_command_start,
3103 );
3104 if word_was_redirection_fd {
3105 at_command_start = true;
3106 }
3107
3108 Self::push_capture_char(content, '<');
3109 self.advance();
3110 Self::push_capture_char(content, '<');
3111 self.advance();
3112
3113 if self.peek_char() == Some('<') {
3114 Self::push_capture_char(content, '<');
3115 self.advance();
3116 expecting_redirection_target = true;
3117 continue;
3118 }
3119
3120 let strip_tabs = if self.peek_char() == Some('-') {
3121 Self::push_capture_char(content, '-');
3122 self.advance();
3123 true
3124 } else {
3125 false
3126 };
3127
3128 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3129 {
3130 pending_heredocs.push((delimiter, strip_tabs));
3131 expecting_redirection_target = false;
3132 } else {
3133 expecting_redirection_target = true;
3134 }
3135 }
3136 '>' | '<' => {
3137 let word_was_redirection_fd = current_word_started_at_command_start
3138 && !current_word.is_empty()
3139 && current_word.chars().all(|current| current.is_ascii_digit());
3140 Self::flush_command_subst_keyword(
3141 &mut current_word,
3142 &mut pending_case_headers,
3143 &mut case_clause_depths,
3144 depth,
3145 &mut current_word_started_at_command_start,
3146 );
3147 if word_was_redirection_fd {
3148 at_command_start = true;
3149 }
3150 Self::push_capture_char(content, c);
3151 self.advance();
3152 expecting_redirection_target = true;
3153 }
3154 '\n' => {
3155 Self::flush_command_subst_keyword(
3156 &mut current_word,
3157 &mut pending_case_headers,
3158 &mut case_clause_depths,
3159 depth,
3160 &mut current_word_started_at_command_start,
3161 );
3162 Self::push_capture_char(content, '\n');
3163 self.advance();
3164 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3165 if !self.read_command_subst_pending_heredoc_into(
3166 content, &delimiter, strip_tabs,
3167 ) {
3168 return false;
3169 }
3170 }
3171 at_command_start = true;
3172 expecting_redirection_target = false;
3173 }
3174 _ => {
3175 if c.is_ascii_alphanumeric() || c == '_' {
3176 if current_word.is_empty()
3177 && !expecting_redirection_target
3178 && at_command_start
3179 {
3180 current_word_started_at_command_start = true;
3181 at_command_start = false;
3182 }
3183 current_word.push(c);
3184 } else {
3185 let had_word = !current_word.is_empty();
3186 Self::flush_command_subst_keyword(
3187 &mut current_word,
3188 &mut pending_case_headers,
3189 &mut case_clause_depths,
3190 depth,
3191 &mut current_word_started_at_command_start,
3192 );
3193 if had_word && expecting_redirection_target {
3194 expecting_redirection_target = false;
3195 }
3196 match c {
3197 ' ' | '\t' => {}
3198 ';' | '|' | '&' => {
3199 at_command_start = true;
3200 expecting_redirection_target = false;
3201 }
3202 _ => {
3203 if !expecting_redirection_target {
3204 at_command_start = false;
3205 }
3206 }
3207 }
3208 }
3209 Self::push_capture_char(content, c);
3210 self.advance();
3211 }
3212 }
3213 }
3214
3215 false
3216 }
3217
3218 fn read_param_expansion_into(
3222 &mut self,
3223 content: &mut Option<String>,
3224 segment_start: Position,
3225 ) -> bool {
3226 let mut borrowable = true;
3227 let mut depth = 1;
3228 let mut literal_brace_depth = 0usize;
3229 let mut in_single = false;
3230 let mut in_double = false;
3231 let mut double_quote_depth = 0usize;
3232 while let Some(c) = self.peek_char() {
3233 if in_single {
3234 match c {
3235 '\\' => {
3236 let escape_start = self.current_position();
3237 if self.second_char() == Some('"') {
3238 self.advance();
3239 borrowable = false;
3240 self.ensure_capture_from_source(content, segment_start, escape_start);
3241 Self::push_capture_char(content, '"');
3242 self.advance();
3243 } else {
3244 Self::push_capture_char(content, '\\');
3245 self.advance();
3246 }
3247 }
3248 '\'' => {
3249 Self::push_capture_char(content, c);
3250 self.advance();
3251 in_single = false;
3252 }
3253 _ => {
3254 Self::push_capture_char(content, c);
3255 self.advance();
3256 }
3257 }
3258 continue;
3259 }
3260
3261 match c {
3262 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3263 self.advance();
3264 Self::push_capture_char(content, '}');
3265 if depth == 1
3266 && literal_brace_depth > 0
3267 && self.has_later_top_level_param_expansion_closer(depth)
3268 {
3269 literal_brace_depth -= 1;
3270 continue;
3271 }
3272 depth -= 1;
3273 if depth == 0 {
3274 break;
3275 }
3276 }
3277 '{' if !in_single && !in_double => {
3278 literal_brace_depth += 1;
3279 Self::push_capture_char(content, '{');
3280 self.advance();
3281 }
3282 '"' => {
3283 Self::push_capture_char(content, '"');
3285 self.advance();
3286 in_double = !in_double;
3287 double_quote_depth = if in_double { depth } else { 0 };
3288 }
3289 '\'' => {
3290 Self::push_capture_char(content, '\'');
3291 self.advance();
3292 if !in_double {
3293 in_single = true;
3294 }
3295 }
3296 '\\' => {
3297 let escape_start = self.current_position();
3300 self.advance();
3301 if let Some(esc) = self.peek_char() {
3302 match esc {
3303 '$' => {
3304 borrowable = false;
3305 self.ensure_capture_from_source(
3306 content,
3307 segment_start,
3308 escape_start,
3309 );
3310 Self::push_capture_char(content, '\x00');
3311 Self::push_capture_char(content, '$');
3312 self.advance();
3313 }
3314 '"' | '\\' | '`' => {
3315 borrowable = false;
3316 self.ensure_capture_from_source(
3317 content,
3318 segment_start,
3319 escape_start,
3320 );
3321 Self::push_capture_char(content, esc);
3322 self.advance();
3323 }
3324 '}' => {
3325 Self::push_capture_char(content, '\\');
3327 Self::push_capture_char(content, '}');
3328 self.advance();
3329 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3330 }
3331 _ => {
3332 Self::push_capture_char(content, '\\');
3333 Self::push_capture_char(content, esc);
3334 self.advance();
3335 }
3336 }
3337 } else {
3338 Self::push_capture_char(content, '\\');
3339 }
3340 }
3341 '$' => {
3342 Self::push_capture_char(content, '$');
3343 self.advance();
3344 if self.peek_char() == Some('(') {
3345 if self.second_char() == Some('(') {
3346 if !self.read_arithmetic_expansion_into(content) {
3347 borrowable = false;
3348 }
3349 } else {
3350 Self::push_capture_char(content, '(');
3351 self.advance();
3352 self.read_command_subst_into(content);
3353 }
3354 } else if self.peek_char() == Some('{') {
3355 Self::push_capture_char(content, '{');
3356 self.advance();
3357 borrowable &= self.read_param_expansion_into(content, segment_start);
3358 }
3359 }
3360 _ => {
3361 Self::push_capture_char(content, c);
3362 self.advance();
3363 }
3364 }
3365 }
3366 borrowable
3367 }
3368
3369 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3370 let mut chars = self.lookahead_chars().peekable();
3371 let mut depth = target_depth;
3372 let mut in_single = false;
3373 let mut in_double = false;
3374 let mut double_quote_depth = 0usize;
3375
3376 while let Some(ch) = chars.next() {
3377 if in_single {
3378 match ch {
3379 '\'' => in_single = false,
3380 '\\' if chars.peek() == Some(&'"') => {
3381 chars.next();
3382 }
3383 '\\' => {}
3384 _ => {}
3385 }
3386 continue;
3387 }
3388
3389 if in_double {
3390 match ch {
3391 '"' => {
3392 in_double = false;
3393 double_quote_depth = 0;
3394 }
3395 '\\' => {
3396 chars.next();
3397 }
3398 '$' if chars.peek() == Some(&'{') => {
3399 chars.next();
3400 depth += 1;
3401 }
3402 '}' if depth > double_quote_depth => {
3403 depth -= 1;
3404 }
3405 _ => {}
3406 }
3407 continue;
3408 }
3409
3410 match ch {
3411 '\n' if depth == target_depth => return false,
3412 '\'' => in_single = true,
3413 '"' => {
3414 in_double = true;
3415 double_quote_depth = depth;
3416 }
3417 '\\' => {
3418 chars.next();
3419 }
3420 '$' if chars.peek() == Some(&'{') => {
3421 chars.next();
3422 depth += 1;
3423 }
3424 '}' => {
3425 if depth == target_depth {
3426 return true;
3427 }
3428 depth -= 1;
3429 }
3430 _ => {}
3431 }
3432 }
3433
3434 false
3435 }
3436
3437 fn looks_like_brace_expansion(&self) -> bool {
3443 const MAX_LOOKAHEAD: usize = 10_000;
3444
3445 let mut chars = self.lookahead_chars();
3446
3447 if chars.next() != Some('{') {
3449 return false;
3450 }
3451
3452 let mut depth = 1;
3453 let mut paren_depth = 0usize;
3454 let mut has_comma = false;
3455 let mut has_dot_dot = false;
3456 let mut escaped = false;
3457 let mut in_single = false;
3458 let mut in_double = false;
3459 let mut in_backtick = false;
3460 let mut prev_char = None;
3461 let mut scanned = 0usize;
3462
3463 for ch in chars {
3464 scanned += 1;
3465 if scanned > MAX_LOOKAHEAD {
3466 return false;
3467 }
3468
3469 let brace_surface_active = !in_single && !in_double && !in_backtick;
3470 let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3471
3472 match ch {
3473 _ if escaped => {
3474 escaped = false;
3475 }
3476 '\\' if !in_single => escaped = true,
3477 '\'' if !in_double && !in_backtick => in_single = !in_single,
3478 '"' if !in_single && !in_backtick => in_double = !in_double,
3479 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3480 '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3481 paren_depth += 1
3482 }
3483 ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3484 '{' if !in_single && !in_double && !in_backtick => depth += 1,
3485 '}' if !in_single && !in_double && !in_backtick => {
3486 depth -= 1;
3487 if depth == 0 {
3488 return has_comma || has_dot_dot;
3490 }
3491 }
3492 ',' if at_top_level => has_comma = true,
3493 '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3494 ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3496 _ => {}
3497 }
3498 prev_char = Some(ch);
3499 }
3500
3501 false
3502 }
3503
3504 fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3505 let mut brace_depth = 1usize;
3506 let mut paren_depth = 0usize;
3507 let mut escaped = false;
3508 let mut in_single = false;
3509 let mut in_double = false;
3510 let mut in_backtick = false;
3511 let mut prev_char = None;
3512
3513 while let Some(ch) = self.peek_char() {
3514 Self::push_capture_char(word, ch);
3515 self.advance();
3516
3517 if escaped {
3518 escaped = false;
3519 prev_char = Some(ch);
3520 continue;
3521 }
3522
3523 match ch {
3524 '\\' if !in_single => escaped = true,
3525 '\'' if !in_double && !in_backtick => in_single = !in_single,
3526 '"' if !in_single && !in_backtick => in_double = !in_double,
3527 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3528 '(' if !in_single
3529 && !in_double
3530 && !in_backtick
3531 && (paren_depth > 0 || prev_char == Some('$')) =>
3532 {
3533 paren_depth += 1
3534 }
3535 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3536 paren_depth -= 1
3537 }
3538 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3539 '}' if !in_single && !in_double && !in_backtick => {
3540 brace_depth -= 1;
3541 if brace_depth == 0 {
3542 break;
3543 }
3544 }
3545 _ => {}
3546 }
3547
3548 prev_char = Some(ch);
3549 }
3550 }
3551
3552 fn consume_brace_word_body(&mut self, word: &mut String) {
3553 let mut brace_depth = 1usize;
3554 let mut paren_depth = 0usize;
3555 let mut escaped = false;
3556 let mut in_single = false;
3557 let mut in_double = false;
3558 let mut in_backtick = false;
3559 let mut prev_char = None;
3560
3561 while let Some(ch) = self.peek_char() {
3562 word.push(ch);
3563 self.advance();
3564
3565 if escaped {
3566 escaped = false;
3567 prev_char = Some(ch);
3568 continue;
3569 }
3570
3571 match ch {
3572 '\\' if !in_single => escaped = true,
3573 '\'' if !in_double && !in_backtick => in_single = !in_single,
3574 '"' if !in_single && !in_backtick => in_double = !in_double,
3575 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3576 '(' if !in_single
3577 && !in_double
3578 && !in_backtick
3579 && (paren_depth > 0 || prev_char == Some('$')) =>
3580 {
3581 paren_depth += 1
3582 }
3583 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3584 paren_depth -= 1
3585 }
3586 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3587 '}' if !in_single && !in_double && !in_backtick => {
3588 brace_depth -= 1;
3589 if brace_depth == 0 {
3590 break;
3591 }
3592 }
3593 _ => {}
3594 }
3595
3596 prev_char = Some(ch);
3597 }
3598 }
3599
3600 fn looks_like_mid_word_brace_segment(&self) -> bool {
3603 const MAX_LOOKAHEAD: usize = 10_000;
3604
3605 let mut chars = self.lookahead_chars();
3606 if chars.next() != Some('{') {
3607 return false;
3608 }
3609
3610 let mut brace_depth = 1;
3611 let mut paren_depth = 0usize;
3612 let mut escaped = false;
3613 let mut in_single = false;
3614 let mut in_double = false;
3615 let mut in_backtick = false;
3616 let mut prev_char = None;
3617 let mut scanned = 0usize;
3618
3619 for ch in chars {
3620 scanned += 1;
3621 if scanned > MAX_LOOKAHEAD {
3622 return false;
3623 }
3624
3625 if !in_single
3626 && !in_double
3627 && !in_backtick
3628 && !escaped
3629 && brace_depth == 1
3630 && paren_depth == 0
3631 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3632 {
3633 return false;
3634 }
3635
3636 if escaped {
3637 escaped = false;
3638 prev_char = Some(ch);
3639 continue;
3640 }
3641
3642 match ch {
3643 '\\' => escaped = true,
3644 '\'' if !in_double && !in_backtick => in_single = !in_single,
3645 '"' if !in_single && !in_backtick => in_double = !in_double,
3646 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3647 '(' if !in_single
3648 && !in_double
3649 && !in_backtick
3650 && (paren_depth > 0 || prev_char == Some('$')) =>
3651 {
3652 paren_depth += 1
3653 }
3654 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3655 paren_depth -= 1
3656 }
3657 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3658 '}' if !in_single && !in_double && !in_backtick => {
3659 brace_depth -= 1;
3660 if brace_depth == 0 {
3661 return true;
3662 }
3663 }
3664 _ => {}
3665 }
3666
3667 prev_char = Some(ch);
3668 }
3669
3670 false
3671 }
3672
3673 fn is_brace_group_start(&self) -> bool {
3675 let mut chars = self.lookahead_chars();
3676 if chars.next() != Some('{') {
3678 return false;
3679 }
3680 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3682 }
3683
3684 fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3687 const MAX_LOOKAHEAD: usize = 10_000;
3688
3689 let mut chars = self.lookahead_chars();
3690 let mut depth = 1;
3691 let mut has_comma = false;
3692 let mut has_dot_dot = false;
3693 let mut prev_char = None;
3694 let mut scanned = 0usize;
3695
3696 for ch in chars.by_ref() {
3697 scanned += 1;
3698 if scanned > MAX_LOOKAHEAD {
3699 return false;
3700 }
3701 match ch {
3702 '{' => depth += 1,
3703 '}' => {
3704 depth -= 1;
3705 if depth == 0 {
3706 return has_comma || has_dot_dot;
3707 }
3708 }
3709 ',' if depth == 1 => has_comma = true,
3710 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3711 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3712 _ => {}
3713 }
3714 prev_char = Some(ch);
3715 }
3716
3717 false
3718 }
3719
3720 fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3721 let mut chars = self.lookahead_chars();
3722 if chars.next() != Some('{') {
3723 return false;
3724 }
3725 chars.next() == Some(')')
3726 }
3727
3728 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3730 let mut word = String::with_capacity(16);
3731
3732 if let Some('{') = self.peek_char() {
3733 word.push('{');
3734 self.advance();
3735 } else {
3736 return None;
3737 }
3738
3739 self.consume_brace_word_body(&mut word);
3740
3741 while let Some(ch) = self.peek_char() {
3742 if Self::is_word_char(ch) {
3743 if self.reinject_buf.is_empty() {
3744 let chunk = self.cursor.eat_while(Self::is_word_char);
3745 word.push_str(chunk);
3746 self.advance_scanned_source_bytes(chunk.len());
3747 } else {
3748 word.push(ch);
3749 self.advance();
3750 }
3751 } else {
3752 break;
3753 }
3754 }
3755
3756 Some(LexedToken::owned_word(TokenKind::Word, word))
3757 }
3758
3759 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3761 let mut word = String::with_capacity(16);
3762
3763 if let Some('{') = self.peek_char() {
3765 word.push('{');
3766 self.advance();
3767 } else {
3768 return None;
3769 }
3770
3771 self.consume_brace_word_body(&mut word);
3773
3774 while let Some(ch) = self.peek_char() {
3776 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3777 if ch == '{' {
3778 word.push(ch);
3780 self.advance();
3781 self.consume_brace_word_body(&mut word);
3782 } else {
3783 word.push(ch);
3784 self.advance();
3785 }
3786 } else {
3787 break;
3788 }
3789 }
3790
3791 Some(LexedToken::owned_word(TokenKind::Word, word))
3792 }
3793
3794 fn looks_like_assoc_assign(&self) -> bool {
3798 let mut chars = self.lookahead_chars();
3799 if chars.next() != Some('(') {
3801 return false;
3802 }
3803 for ch in chars {
3805 match ch {
3806 ' ' | '\t' => continue,
3807 '[' => return true,
3808 _ => return false,
3809 }
3810 }
3811 false
3812 }
3813
3814 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3815 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3816 }
3817
3818 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3819 word.segments().any(|segment| {
3820 matches!(
3821 segment.kind(),
3822 LexedWordSegmentKind::SingleQuoted
3823 | LexedWordSegmentKind::DollarSingleQuoted
3824 | LexedWordSegmentKind::DoubleQuoted
3825 | LexedWordSegmentKind::DollarDoubleQuoted
3826 )
3827 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3828 }
3829
3830 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3831 text.contains(['*', '?'])
3832 || text.ends_with('}') && text.contains("${")
3833 || text.ends_with(']')
3834 && text
3835 .rfind('[')
3836 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3837 }
3838
3839 fn is_word_char(ch: char) -> bool {
3840 !matches!(
3841 ch,
3842 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3843 )
3844 }
3845
3846 const fn is_ascii_word_byte(byte: u8) -> bool {
3847 !matches!(
3848 byte,
3849 b' ' | b'\t'
3850 | b'\n'
3851 | b';'
3852 | b'|'
3853 | b'&'
3854 | b'>'
3855 | b'<'
3856 | b'('
3857 | b')'
3858 | b'{'
3859 | b'}'
3860 | b'\''
3861 | b'"'
3862 )
3863 }
3864
3865 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3866 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3867 }
3868
3869 fn is_plain_word_char(ch: char) -> bool {
3870 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3871 }
3872
3873 pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3875 let mut content = String::with_capacity(64);
3876 let mut current_line = String::with_capacity(64);
3877
3878 let mut rest_of_line = String::with_capacity(32);
3885 let rest_of_line_start = self.current_position();
3886 let mut in_double_quote = false;
3887 let mut in_single_quote = false;
3888 let mut in_comment = false;
3889 let mut saw_non_whitespace_tail = false;
3890 let mut consecutive_backslashes = 0usize;
3891 let mut previous_tail_char = None;
3892 while let Some(ch) = self.peek_char() {
3893 self.advance();
3894 if in_comment {
3895 if ch == '\n' {
3896 break;
3897 }
3898 rest_of_line.push(ch);
3899 previous_tail_char = Some(ch);
3900 continue;
3901 }
3902 if ch == '#'
3903 && !in_single_quote
3904 && !in_double_quote
3905 && self.comments_enabled()
3906 && heredoc_tail_hash_starts_comment(previous_tail_char)
3907 {
3908 in_comment = true;
3909 rest_of_line.push(ch);
3910 previous_tail_char = Some(ch);
3911 consecutive_backslashes = 0;
3912 continue;
3913 }
3914 let backslash_continues_line = ch == '\\'
3915 && !in_single_quote
3916 && self.peek_char() == Some('\n')
3917 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3918 && consecutive_backslashes.is_multiple_of(2);
3919 if backslash_continues_line {
3920 rest_of_line.push(ch);
3921 rest_of_line.push('\n');
3922 self.advance();
3923 consecutive_backslashes = 0;
3924 continue;
3925 }
3926 if ch == '\n' && !in_double_quote && !in_single_quote {
3927 break;
3928 }
3929 if ch == '"' && !in_single_quote {
3930 in_double_quote = !in_double_quote;
3931 } else if ch == '\'' && !in_double_quote {
3932 in_single_quote = !in_single_quote;
3933 } else if ch == '\\' && in_double_quote {
3934 rest_of_line.push(ch);
3936 if let Some(next) = self.peek_char() {
3937 rest_of_line.push(next);
3938 self.advance();
3939 }
3940 continue;
3941 }
3942 rest_of_line.push(ch);
3943 if !ch.is_whitespace() {
3944 saw_non_whitespace_tail = true;
3945 }
3946 if ch == '\\' && !in_single_quote {
3947 consecutive_backslashes += 1;
3948 } else {
3949 consecutive_backslashes = 0;
3950 }
3951 previous_tail_char = Some(ch);
3952 }
3953
3954 self.sync_offset_to_cursor();
3958 let content_start = self.current_position();
3959 let mut current_line_start = content_start;
3960 let content_end;
3961
3962 loop {
3964 if self.reinject_buf.is_empty() {
3965 self.sync_offset_to_cursor();
3971 let rest = self.cursor.rest();
3972 if rest.is_empty() {
3973 content_end = self.current_position();
3974 break;
3975 }
3976
3977 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3978 let line = &rest[..line_len];
3979 let has_newline = line_len < rest.len();
3980
3981 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3982 content_end = current_line_start;
3983 self.consume_source_bytes(line_len);
3984 if has_newline {
3985 self.consume_ascii_chars(1);
3986 }
3987 break;
3988 }
3989
3990 content.push_str(line);
3991 self.consume_source_bytes(line_len);
3992
3993 if has_newline {
3994 self.consume_ascii_chars(1);
3995 content.push('\n');
3996 current_line_start = self.current_position();
3997 continue;
3998 }
3999
4000 content_end = self.current_position();
4001 break;
4002 }
4003
4004 match self.peek_char() {
4005 Some('\n') => {
4006 self.advance();
4007 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4009 content_end = current_line_start;
4010 break;
4011 }
4012 content.push_str(¤t_line);
4013 content.push('\n');
4014 current_line.clear();
4015 current_line_start = self.current_position();
4016 }
4017 Some(ch) => {
4018 current_line.push(ch);
4019 self.advance();
4020 }
4021 None => {
4022 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4024 content_end = current_line_start;
4025 break;
4026 }
4027 if !current_line.is_empty() {
4028 content.push_str(¤t_line);
4029 }
4030 content_end = self.current_position();
4031 break;
4032 }
4033 }
4034 }
4035
4036 let post_heredoc_offset = self.offset;
4041 self.offset = rest_of_line_start.offset;
4042 for ch in rest_of_line.chars() {
4043 self.reinject_buf.push_back(ch);
4044 }
4045 self.reinject_buf.push_back('\n');
4046 self.reinject_resume_offset = Some(post_heredoc_offset);
4047
4048 HeredocRead {
4049 content,
4050 content_span: Span::from_positions(content_start, content_end),
4051 }
4052 }
4053
4054 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4055 let mut chars = self.cursor.rest().chars();
4056 if chars.next() != Some('\n') {
4057 return false;
4058 }
4059
4060 for ch in chars {
4061 if matches!(ch, ' ' | '\t') {
4062 continue;
4063 }
4064 if ch == '\n' {
4065 return false;
4066 }
4067 return matches!(ch, '|' | '&' | ';' | '<' | '>')
4068 || (ch == '#' && self.comments_enabled());
4069 }
4070
4071 false
4072 }
4073}
4074
4075fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4076 let line = if strip_tabs {
4077 line.trim_start_matches('\t')
4078 } else {
4079 line
4080 };
4081
4082 if line == delimiter {
4083 return true;
4084 }
4085
4086 let Some(trailing) = line.strip_prefix(delimiter) else {
4087 return false;
4088 };
4089
4090 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4091}
4092
4093fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4094 previous_tail_char.is_none_or(|prev| {
4095 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4096 })
4097}
4098
4099fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4100 let ch = input.get(index..)?.chars().next()?;
4101 Some((ch, index + ch.len_utf8()))
4102}
4103
4104fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4105 let mut index = 0usize;
4106 let mut depth = 0usize;
4107 let mut in_single = false;
4108 let mut in_double = false;
4109 let mut in_backtick = false;
4110 let mut escaped = false;
4111
4112 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4113 let was_escaped = escaped;
4114 if ch == '\\' && !in_single {
4115 escaped = !escaped;
4116 index = next_index;
4117 continue;
4118 }
4119 escaped = false;
4120
4121 match ch {
4122 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4123 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4124 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4125 '(' if !in_single
4126 && !in_double
4127 && !in_backtick
4128 && !was_escaped
4129 && prefix[next_index..].starts_with('(') =>
4130 {
4131 depth += 1;
4132 index = next_index + '('.len_utf8();
4133 continue;
4134 }
4135 ')' if !in_single
4136 && !in_double
4137 && !in_backtick
4138 && !was_escaped
4139 && prefix[next_index..].starts_with(')') =>
4140 {
4141 depth = depth.saturating_sub(1);
4142 index = next_index + ')'.len_utf8();
4143 continue;
4144 }
4145 _ => {}
4146 }
4147
4148 index = next_index;
4149 }
4150
4151 depth > 0
4152}
4153
4154fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4155 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4156 let prefix = &input[line_start..index];
4157 line_has_unclosed_double_paren(prefix)
4158}
4159
4160fn hash_starts_comment(input: &str, index: usize) -> bool {
4161 if inside_unclosed_double_paren_on_line(input, index) {
4162 return false;
4163 }
4164
4165 let next = &input[index + '#'.len_utf8()..];
4166 input[..index]
4167 .chars()
4168 .next_back()
4169 .is_none_or(|prev| match prev {
4170 '(' => {
4171 let whitespace_index = next.find(char::is_whitespace);
4172 let close_index = next.find(')');
4173
4174 match (whitespace_index, close_index) {
4175 (Some(whitespace), Some(close)) => whitespace < close,
4176 (Some(_), None) | (None, None) => true,
4177 (None, Some(_)) => false,
4178 }
4179 }
4180 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4181 })
4182}
4183
4184fn heredoc_delimiter_is_terminator(
4185 ch: char,
4186 in_single: bool,
4187 in_double: bool,
4188 escaped: bool,
4189) -> bool {
4190 !in_single
4191 && !in_double
4192 && !escaped
4193 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4194}
4195
4196fn scan_double_quoted_command_substitution_segment(
4197 input: &str,
4198 mut index: usize,
4199 subst_depth: usize,
4200) -> Option<usize> {
4201 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4202 match ch {
4203 '"' => return Some(next_index),
4204 '\\' => {
4205 index = next_index;
4206 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4207 index = escaped_next;
4208 }
4209 }
4210 '$' if input[next_index..].starts_with('{') => {
4211 let consumed = scan_command_subst_parameter_expansion_len(
4212 &input[next_index + '{'.len_utf8()..],
4213 subst_depth,
4214 )?;
4215 index = next_index + '{'.len_utf8() + consumed;
4216 }
4217 '$' if input[next_index..].starts_with('(')
4218 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4219 {
4220 let consumed = scan_command_substitution_body_len_inner(
4221 &input[next_index + '('.len_utf8()..],
4222 subst_depth + 1,
4223 )?;
4224 index = next_index + '('.len_utf8() + consumed;
4225 }
4226 _ => index = next_index,
4227 }
4228 }
4229
4230 None
4231}
4232
4233fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4234 let mut index = 0usize;
4235 let mut in_single = false;
4236 let mut in_double = false;
4237 let mut in_ansi_c_single = false;
4238 let mut in_backtick = false;
4239 let mut escaped = false;
4240 let mut ansi_c_quote_pending = false;
4241
4242 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4243 let was_escaped = escaped;
4244 if ch == '\\' && !in_single {
4245 escaped = !escaped;
4246 index = next_index;
4247 ansi_c_quote_pending = false;
4248 continue;
4249 }
4250 escaped = false;
4251
4252 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4253 if input[next_index..].starts_with('{')
4254 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4255 &input[next_index + '{'.len_utf8()..],
4256 subst_depth,
4257 )
4258 {
4259 index = next_index + '{'.len_utf8() + consumed;
4260 ansi_c_quote_pending = false;
4261 continue;
4262 }
4263
4264 if input[next_index..].starts_with('(')
4265 && !input[next_index + '('.len_utf8()..].starts_with('(')
4266 && let Some(consumed) = scan_command_substitution_body_len_inner(
4267 &input[next_index + '('.len_utf8()..],
4268 subst_depth + 1,
4269 )
4270 {
4271 index = next_index + '('.len_utf8() + consumed;
4272 ansi_c_quote_pending = false;
4273 continue;
4274 }
4275 }
4276
4277 if !in_single
4278 && !in_ansi_c_single
4279 && !in_double
4280 && !in_backtick
4281 && !was_escaped
4282 && matches!(ch, '<' | '>')
4283 && input[next_index..].starts_with('(')
4284 && let Some(consumed) = scan_command_substitution_body_len_inner(
4285 &input[next_index + '('.len_utf8()..],
4286 subst_depth + 1,
4287 )
4288 {
4289 index = next_index + '('.len_utf8() + consumed;
4290 ansi_c_quote_pending = false;
4291 continue;
4292 }
4293
4294 match ch {
4295 '\'' if !in_double && !in_backtick && !was_escaped => {
4296 if in_ansi_c_single {
4297 in_ansi_c_single = false;
4298 } else if !in_single && ansi_c_quote_pending {
4299 in_ansi_c_single = true;
4300 } else {
4301 in_single = !in_single;
4302 }
4303 }
4304 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4305 in_double = !in_double
4306 }
4307 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4308 in_backtick = !in_backtick
4309 }
4310 '}' if !in_single
4311 && !in_ansi_c_single
4312 && !in_double
4313 && !in_backtick
4314 && !was_escaped =>
4315 {
4316 return Some(next_index);
4317 }
4318 _ => {}
4319 }
4320
4321 ansi_c_quote_pending = ch == '$'
4322 && !in_single
4323 && !in_ansi_c_single
4324 && !in_double
4325 && !in_backtick
4326 && !was_escaped;
4327 index = next_index;
4328 }
4329
4330 None
4331}
4332
4333fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4334 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4335 if !matches!(ch, ' ' | '\t') {
4336 break;
4337 }
4338 index = next_index;
4339 }
4340
4341 let start = index;
4342 let mut cooked = String::new();
4343 let mut in_single = false;
4344 let mut in_double = false;
4345 let mut escaped = false;
4346
4347 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4348 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4349 break;
4350 }
4351
4352 index = next_index;
4353 if escaped {
4354 cooked.push(ch);
4355 escaped = false;
4356 continue;
4357 }
4358
4359 match ch {
4360 '\\' if !in_single => escaped = true,
4361 '\'' if !in_double => in_single = !in_single,
4362 '"' if !in_single => in_double = !in_double,
4363 _ => cooked.push(ch),
4364 }
4365 }
4366
4367 (index > start).then_some((index, cooked))
4368}
4369
4370fn skip_command_subst_pending_heredoc(
4371 input: &str,
4372 mut index: usize,
4373 delimiter: &str,
4374 strip_tabs: bool,
4375) -> usize {
4376 while index <= input.len() {
4377 let rest = &input[index..];
4378 let line_len = rest.find('\n').unwrap_or(rest.len());
4379 let line = &rest[..line_len];
4380 let has_newline = line_len < rest.len();
4381
4382 index += line_len;
4383 if has_newline {
4384 index += '\n'.len_utf8();
4385 }
4386
4387 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4388 return index;
4389 }
4390 }
4391
4392 index
4393}
4394
4395fn scan_command_subst_ansi_c_single_quoted_segment(
4396 input: &str,
4397 quote_index: usize,
4398) -> Option<usize> {
4399 let mut index = quote_index + '\''.len_utf8();
4400
4401 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4402 index = next_index;
4403 if ch == '\\' {
4404 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4405 index = escaped_next;
4406 }
4407 continue;
4408 }
4409
4410 if ch == '\'' {
4411 return Some(index);
4412 }
4413 }
4414
4415 None
4416}
4417
4418fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4419 let mut index = start;
4420
4421 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4422 index = next_index;
4423 if ch == '\\' {
4424 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4425 index = escaped_next;
4426 }
4427 continue;
4428 }
4429
4430 if ch == '`' {
4431 return Some(index);
4432 }
4433 }
4434
4435 None
4436}
4437
4438fn flush_scanned_command_subst_keyword(
4439 current_word: &mut String,
4440 pending_case_headers: &mut usize,
4441 case_clause_depths: &mut SmallVec<[usize; 4]>,
4442 depth: usize,
4443 word_started_at_command_start: &mut bool,
4444) {
4445 if current_word.is_empty() {
4446 *word_started_at_command_start = false;
4447 return;
4448 }
4449
4450 match current_word.as_str() {
4451 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4452 "in" if *pending_case_headers > 0 => {
4453 *pending_case_headers -= 1;
4454 case_clause_depths.push(depth);
4455 }
4456 "esac" if *word_started_at_command_start => {
4457 case_clause_depths.pop();
4458 }
4459 _ => {}
4460 }
4461
4462 current_word.clear();
4463 *word_started_at_command_start = false;
4464}
4465
4466fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4467 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4468 return None;
4469 }
4470
4471 let mut index = 0usize;
4472 let mut depth = 1;
4473 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4474 let mut pending_case_headers = 0usize;
4475 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4476 let mut current_word = String::with_capacity(16);
4477 let mut at_command_start = true;
4478 let mut expecting_redirection_target = false;
4479 let mut current_word_started_at_command_start = false;
4480
4481 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4482 match ch {
4483 '#' if hash_starts_comment(input, index) => {
4484 let had_word = !current_word.is_empty();
4485 flush_scanned_command_subst_keyword(
4486 &mut current_word,
4487 &mut pending_case_headers,
4488 &mut case_clause_depths,
4489 depth,
4490 &mut current_word_started_at_command_start,
4491 );
4492 if had_word && expecting_redirection_target {
4493 expecting_redirection_target = false;
4494 }
4495 index = next_index;
4496 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4497 index = comment_next;
4498 if comment_ch == '\n' {
4499 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4500 index = skip_command_subst_pending_heredoc(
4501 input, index, &delimiter, strip_tabs,
4502 );
4503 }
4504 at_command_start = true;
4505 expecting_redirection_target = false;
4506 break;
4507 }
4508 }
4509 }
4510 '(' => {
4511 flush_scanned_command_subst_keyword(
4512 &mut current_word,
4513 &mut pending_case_headers,
4514 &mut case_clause_depths,
4515 depth,
4516 &mut current_word_started_at_command_start,
4517 );
4518 depth += 1;
4519 index = next_index;
4520 at_command_start = true;
4521 expecting_redirection_target = false;
4522 }
4523 ')' => {
4524 flush_scanned_command_subst_keyword(
4525 &mut current_word,
4526 &mut pending_case_headers,
4527 &mut case_clause_depths,
4528 depth,
4529 &mut current_word_started_at_command_start,
4530 );
4531 if case_clause_depths
4532 .last()
4533 .is_some_and(|case_depth| *case_depth == depth)
4534 {
4535 index = next_index;
4536 at_command_start = true;
4537 expecting_redirection_target = false;
4538 continue;
4539 }
4540 depth -= 1;
4541 index = next_index;
4542 if depth == 0 {
4543 return Some(index);
4544 }
4545 at_command_start = false;
4546 expecting_redirection_target = false;
4547 }
4548 '"' => {
4549 let had_word = !current_word.is_empty();
4550 flush_scanned_command_subst_keyword(
4551 &mut current_word,
4552 &mut pending_case_headers,
4553 &mut case_clause_depths,
4554 depth,
4555 &mut current_word_started_at_command_start,
4556 );
4557 if had_word && expecting_redirection_target {
4558 expecting_redirection_target = false;
4559 }
4560 index = scan_double_quoted_command_substitution_segment(
4561 input,
4562 next_index,
4563 subst_depth,
4564 )?;
4565 if expecting_redirection_target {
4566 expecting_redirection_target = false;
4567 } else {
4568 at_command_start = false;
4569 }
4570 }
4571 '\'' => {
4572 let had_word = !current_word.is_empty();
4573 flush_scanned_command_subst_keyword(
4574 &mut current_word,
4575 &mut pending_case_headers,
4576 &mut case_clause_depths,
4577 depth,
4578 &mut current_word_started_at_command_start,
4579 );
4580 if had_word && expecting_redirection_target {
4581 expecting_redirection_target = false;
4582 }
4583 index = next_index;
4584 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4585 index = quoted_next;
4586 if quoted_ch == '\'' {
4587 break;
4588 }
4589 }
4590 if expecting_redirection_target {
4591 expecting_redirection_target = false;
4592 } else {
4593 at_command_start = false;
4594 }
4595 }
4596 '`' => {
4597 let had_word = !current_word.is_empty();
4598 flush_scanned_command_subst_keyword(
4599 &mut current_word,
4600 &mut pending_case_headers,
4601 &mut case_clause_depths,
4602 depth,
4603 &mut current_word_started_at_command_start,
4604 );
4605 if had_word && expecting_redirection_target {
4606 expecting_redirection_target = false;
4607 }
4608 index = scan_command_subst_backtick_segment(input, next_index)?;
4609 if expecting_redirection_target {
4610 expecting_redirection_target = false;
4611 } else {
4612 at_command_start = false;
4613 }
4614 }
4615 '$' if input[next_index..].starts_with('\'') => {
4616 let had_word = !current_word.is_empty();
4617 flush_scanned_command_subst_keyword(
4618 &mut current_word,
4619 &mut pending_case_headers,
4620 &mut case_clause_depths,
4621 depth,
4622 &mut current_word_started_at_command_start,
4623 );
4624 if had_word && expecting_redirection_target {
4625 expecting_redirection_target = false;
4626 }
4627 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4628 if expecting_redirection_target {
4629 expecting_redirection_target = false;
4630 } else {
4631 at_command_start = false;
4632 }
4633 }
4634 '\\' => {
4635 let had_word = !current_word.is_empty();
4636 flush_scanned_command_subst_keyword(
4637 &mut current_word,
4638 &mut pending_case_headers,
4639 &mut case_clause_depths,
4640 depth,
4641 &mut current_word_started_at_command_start,
4642 );
4643 if had_word && expecting_redirection_target {
4644 expecting_redirection_target = false;
4645 }
4646 index = next_index;
4647 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4648 index = escaped_next;
4649 }
4650 if expecting_redirection_target {
4651 expecting_redirection_target = false;
4652 } else {
4653 at_command_start = false;
4654 }
4655 }
4656 '>' => {
4657 let word_was_redirection_fd = current_word_started_at_command_start
4658 && !current_word.is_empty()
4659 && current_word.chars().all(|current| current.is_ascii_digit());
4660 flush_scanned_command_subst_keyword(
4661 &mut current_word,
4662 &mut pending_case_headers,
4663 &mut case_clause_depths,
4664 depth,
4665 &mut current_word_started_at_command_start,
4666 );
4667 if word_was_redirection_fd {
4668 at_command_start = true;
4669 }
4670 index = next_index;
4671 expecting_redirection_target = true;
4672 }
4673 '<' if input[next_index..].starts_with('<') => {
4674 let word_was_redirection_fd = current_word_started_at_command_start
4675 && !current_word.is_empty()
4676 && current_word.chars().all(|current| current.is_ascii_digit());
4677 let had_word = !current_word.is_empty();
4678 flush_scanned_command_subst_keyword(
4679 &mut current_word,
4680 &mut pending_case_headers,
4681 &mut case_clause_depths,
4682 depth,
4683 &mut current_word_started_at_command_start,
4684 );
4685 if had_word && expecting_redirection_target {
4686 expecting_redirection_target = false;
4687 }
4688 if word_was_redirection_fd {
4689 at_command_start = true;
4690 }
4691 if inside_unclosed_double_paren_on_line(input, index) {
4692 index = next_index + '<'.len_utf8();
4693 continue;
4694 }
4695
4696 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4697 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4698 expecting_redirection_target = true;
4699 continue;
4700 }
4701
4702 let strip_tabs = input[next_index..].starts_with("<-");
4703 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4704 if let Some((delimiter_index, delimiter)) =
4705 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4706 {
4707 pending_heredocs.push((delimiter, strip_tabs));
4708 index = delimiter_index;
4709 expecting_redirection_target = false;
4710 } else {
4711 index = next_index;
4712 expecting_redirection_target = true;
4713 }
4714 }
4715 '\n' => {
4716 flush_scanned_command_subst_keyword(
4717 &mut current_word,
4718 &mut pending_case_headers,
4719 &mut case_clause_depths,
4720 depth,
4721 &mut current_word_started_at_command_start,
4722 );
4723 index = next_index;
4724 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4725 index =
4726 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4727 }
4728 at_command_start = true;
4729 expecting_redirection_target = false;
4730 }
4731 '$' if input[next_index..].starts_with('{') => {
4732 let had_word = !current_word.is_empty();
4733 flush_scanned_command_subst_keyword(
4734 &mut current_word,
4735 &mut pending_case_headers,
4736 &mut case_clause_depths,
4737 depth,
4738 &mut current_word_started_at_command_start,
4739 );
4740 if had_word && expecting_redirection_target {
4741 expecting_redirection_target = false;
4742 }
4743 let consumed = scan_command_subst_parameter_expansion_len(
4744 &input[next_index + '{'.len_utf8()..],
4745 subst_depth,
4746 )?;
4747 index = next_index + '{'.len_utf8() + consumed;
4748 if expecting_redirection_target {
4749 expecting_redirection_target = false;
4750 } else {
4751 at_command_start = false;
4752 }
4753 }
4754 '$' if input[next_index..].starts_with('(')
4755 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4756 {
4757 let had_word = !current_word.is_empty();
4758 flush_scanned_command_subst_keyword(
4759 &mut current_word,
4760 &mut pending_case_headers,
4761 &mut case_clause_depths,
4762 depth,
4763 &mut current_word_started_at_command_start,
4764 );
4765 if had_word && expecting_redirection_target {
4766 expecting_redirection_target = false;
4767 }
4768 let consumed = scan_command_substitution_body_len_inner(
4769 &input[next_index + '('.len_utf8()..],
4770 subst_depth + 1,
4771 )?;
4772 index = next_index + '('.len_utf8() + consumed;
4773 if expecting_redirection_target {
4774 expecting_redirection_target = false;
4775 } else {
4776 at_command_start = false;
4777 }
4778 }
4779 _ => {
4780 if ch.is_ascii_alphanumeric() || ch == '_' {
4781 if current_word.is_empty() && !expecting_redirection_target && at_command_start
4782 {
4783 current_word_started_at_command_start = true;
4784 at_command_start = false;
4785 }
4786 current_word.push(ch);
4787 } else {
4788 let had_word = !current_word.is_empty();
4789 flush_scanned_command_subst_keyword(
4790 &mut current_word,
4791 &mut pending_case_headers,
4792 &mut case_clause_depths,
4793 depth,
4794 &mut current_word_started_at_command_start,
4795 );
4796 if had_word && expecting_redirection_target {
4797 expecting_redirection_target = false;
4798 }
4799 match ch {
4800 ' ' | '\t' => {}
4801 ';' | '|' | '&' => {
4802 at_command_start = true;
4803 expecting_redirection_target = false;
4804 }
4805 _ => {
4806 if !expecting_redirection_target {
4807 at_command_start = false;
4808 }
4809 }
4810 }
4811 }
4812 index = next_index;
4813 }
4814 }
4815 }
4816
4817 None
4818}
4819
4820pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4821 scan_command_substitution_body_len_inner(input, 0)
4822}
4823
4824#[cfg(test)]
4825mod tests {
4826 use super::*;
4827
4828 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4829 match token.kind {
4830 kind if kind.is_word_like() => token.word_string(),
4831 TokenKind::Comment => token
4832 .span
4833 .slice(source)
4834 .strip_prefix('#')
4835 .map(str::to_string),
4836 TokenKind::Error => token
4837 .error_kind()
4838 .map(LexerErrorKind::message)
4839 .map(str::to_string),
4840 _ => None,
4841 }
4842 }
4843
4844 fn assert_next_token(
4845 lexer: &mut Lexer<'_>,
4846 expected_kind: TokenKind,
4847 expected_text: Option<&str>,
4848 ) {
4849 let token = lexer.next_lexed_token().unwrap();
4850 assert_eq!(token.kind, expected_kind);
4851 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4852 }
4853
4854 fn assert_next_token_with_comments(
4855 lexer: &mut Lexer<'_>,
4856 expected_kind: TokenKind,
4857 expected_text: Option<&str>,
4858 ) {
4859 let token = lexer.next_lexed_token_with_comments().unwrap();
4860 assert_eq!(token.kind, expected_kind);
4861 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4862 }
4863
4864 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4865 let mut lexer = Lexer::new(input);
4866
4867 while let Some(token) = lexer.next_lexed_token() {
4868 if token.kind == TokenKind::Newline {
4869 continue;
4870 }
4871
4872 assert_eq!(
4873 token.span.start.line, token.span.end.line,
4874 "token should stay on one line: {:?}",
4875 token
4876 );
4877 }
4878 }
4879
4880 #[test]
4881 fn test_simple_words() {
4882 let mut lexer = Lexer::new("echo hello world");
4883
4884 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4885 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4886 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4887 assert!(lexer.next_lexed_token().is_none());
4888 }
4889
4890 #[test]
4891 fn test_single_quoted_string() {
4892 let mut lexer = Lexer::new("echo 'hello world'");
4893
4894 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4895 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4897 assert!(lexer.next_lexed_token().is_none());
4898 }
4899
4900 #[test]
4901 fn test_double_quoted_string() {
4902 let mut lexer = Lexer::new("echo \"hello world\"");
4903
4904 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4905 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4906 assert!(lexer.next_lexed_token().is_none());
4907 }
4908
4909 #[test]
4910 fn test_brace_expansion_token_ignores_quoted_closers() {
4911 let mut lexer = Lexer::new("echo {\"}\",a}\n");
4912
4913 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4914 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
4915 assert_next_token(&mut lexer, TokenKind::Newline, None);
4916 assert!(lexer.next_lexed_token().is_none());
4917 }
4918
4919 #[test]
4920 fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
4921 let mut lexer = Lexer::new("echo {'a\\',b} next\n");
4922
4923 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4924 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
4925 assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
4926 assert_next_token(&mut lexer, TokenKind::Newline, None);
4927 assert!(lexer.next_lexed_token().is_none());
4928 }
4929
4930 #[test]
4931 fn test_double_quoted_expansion_token_keeps_source_backing() {
4932 let source = r#""$bar""#;
4933 let mut lexer = Lexer::new(source);
4934
4935 let token = lexer.next_lexed_token().unwrap();
4936 assert_eq!(token.kind, TokenKind::QuotedWord);
4937 assert_eq!(token.word_text(), Some("$bar"));
4938
4939 let word = token.word().unwrap();
4940 let segment = word.single_segment().unwrap();
4941 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4942 assert_eq!(segment.span().unwrap().slice(source), "$bar");
4943 }
4944
4945 #[test]
4946 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4947 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4948 let mut lexer = Lexer::new(source);
4949
4950 let token = lexer.next_lexed_token().unwrap();
4951 assert_eq!(token.kind, TokenKind::QuotedWord);
4952 assert_eq!(
4953 token.word_text(),
4954 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4955 );
4956 }
4957
4958 #[test]
4959 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4960 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4961 let mut lexer = Lexer::new(source);
4962
4963 let token = lexer.next_lexed_token().unwrap();
4964 assert_eq!(token.kind, TokenKind::QuotedWord);
4965 assert_eq!(
4966 token.word_text(),
4967 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4968 );
4969 }
4970
4971 #[test]
4972 fn test_mixed_word_keeps_segment_kinds() {
4973 let source = r#"foo"bar"'baz'"#;
4974 let mut lexer = Lexer::new(source);
4975
4976 let token = lexer.next_lexed_token().unwrap();
4977 assert_eq!(token.kind, TokenKind::Word);
4978
4979 let word = token.word().unwrap();
4980 let segments: Vec<_> = word
4981 .segments()
4982 .map(|segment| (segment.kind(), segment.as_str().to_string()))
4983 .collect();
4984
4985 assert_eq!(
4986 segments,
4987 vec![
4988 (LexedWordSegmentKind::Plain, "foo".to_string()),
4989 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4990 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4991 ]
4992 );
4993 assert_eq!(word.joined_text(), "foobarbaz");
4994 assert_eq!(
4995 word.segments()
4996 .next()
4997 .and_then(LexedWordSegment::span)
4998 .unwrap()
4999 .slice(source),
5000 "foo"
5001 );
5002 }
5003
5004 #[test]
5005 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5006 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5007
5008 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5009 let body = &source[..consumed];
5010
5011 assert!(body.contains("field, direction"));
5012 assert!(body.ends_with(')'));
5013 }
5014
5015 #[test]
5016 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5017 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5018
5019 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5020 let body = &source[..consumed];
5021
5022 assert!(body.contains("printf '%s' y"));
5023 assert!(body.ends_with(')'));
5024 }
5025
5026 #[test]
5027 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5028 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5029
5030 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5031 let body = &source[..consumed];
5032
5033 assert!(body.contains("printf %s 1,2"));
5034 assert!(body.ends_with(')'));
5035 }
5036
5037 #[test]
5038 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5039 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5040
5041 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5042 let body = &source[..consumed];
5043
5044 assert!(body.contains("field, direction"));
5045 assert!(body.ends_with(')'));
5046 }
5047
5048 #[test]
5049 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5050 let source = "printf %s ${x//foo/)},1)\"";
5051
5052 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5053 let body = &source[..consumed];
5054
5055 assert!(body.contains("${x//foo/)},1"));
5056 assert!(body.ends_with(')'));
5057 }
5058
5059 #[test]
5060 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5061 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5062
5063 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5064 let body = &source[..consumed];
5065
5066 assert!(body.contains("printf %s 1,2"));
5067 assert!(body.ends_with(')'));
5068 }
5069
5070 #[test]
5071 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5072 let source = "[[ \"$buf\" == (#b)(*) ]]";
5073 let index = source.find('#').expect("expected hash");
5074
5075 assert!(!hash_starts_comment(source, index));
5076 }
5077
5078 #[test]
5079 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5080 let source = "(#comment with )";
5081 let index = source.find('#').expect("expected hash");
5082
5083 assert!(hash_starts_comment(source, index));
5084 }
5085
5086 #[test]
5087 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5088 let source = "(( #c < 256 ))";
5089 let index = source.find('#').expect("expected hash");
5090
5091 assert!(!hash_starts_comment(source, index));
5092 }
5093
5094 #[test]
5095 fn test_hash_starts_comment_respects_quoted_double_parens() {
5096 let source = "printf '((' # comment";
5097 let index = source.find('#').expect("expected hash");
5098
5099 assert!(hash_starts_comment(source, index));
5100 }
5101
5102 #[test]
5103 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5104 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5105
5106 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5107 let body = &source[..consumed];
5108
5109 assert!(body.contains("printf %s 1,2"));
5110 assert!(body.ends_with(')'));
5111 }
5112
5113 #[test]
5114 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5115 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5116
5117 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5118 let body = &source[..consumed];
5119
5120 assert!(body.contains("printf %s 1,2"));
5121 assert!(body.ends_with(')'));
5122 }
5123
5124 #[test]
5125 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5126 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5127
5128 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5129 let body = &source[..consumed];
5130
5131 assert!(body.contains("printf %s 1,2"));
5132 assert!(body.ends_with(')'));
5133 }
5134
5135 #[test]
5136 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5137 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5138
5139 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5140 let body = &source[..consumed];
5141
5142 assert!(body.contains("printf %s 1,2"));
5143 assert!(body.ends_with("))"));
5144 }
5145
5146 #[test]
5147 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5148 let source = "printf %s 1,2; echo case in)\"";
5149
5150 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5151 let body = &source[..consumed];
5152
5153 assert!(body.contains("echo case in"));
5154 assert!(body.ends_with(')'));
5155 }
5156
5157 #[test]
5158 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5159 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5160
5161 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5162 let body = &source[..consumed];
5163
5164 assert!(body.contains("$'a\\'b'"));
5165 assert!(body.contains("printf %s 1,2"));
5166 assert!(body.ends_with(')'));
5167 }
5168
5169 #[test]
5170 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5171 let source = "printf %s `echo foo)`; printf %s ok)\"";
5172
5173 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5174 let body = &source[..consumed];
5175
5176 assert!(body.contains("`echo foo)`"));
5177 assert!(body.contains("printf %s ok"));
5178 assert!(body.ends_with(')'));
5179 }
5180
5181 #[test]
5182 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5183 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5184
5185 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5186 let body = &source[..consumed];
5187
5188 assert!(body.contains("${x/`echo }`/foo)},1"));
5189 assert!(body.ends_with(')'));
5190 }
5191
5192 #[test]
5193 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5194 {
5195 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5196
5197 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5198 let body = &source[..consumed];
5199
5200 assert!(body.contains("${x/<(echo })/foo)},1"));
5201 assert!(body.ends_with(')'));
5202 }
5203
5204 #[test]
5205 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5206 let source = "printf %s 1,2; echo case in)";
5207
5208 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5209 let body = &source[..consumed];
5210
5211 assert_eq!(body, source);
5212 }
5213
5214 #[test]
5215 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5216 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5217
5218 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5219 let body = &source[..consumed];
5220
5221 assert_eq!(body, source);
5222 }
5223
5224 #[test]
5225 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5226 let source = "printf %s `echo foo)`; printf %s ok)";
5227
5228 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5229 let body = &source[..consumed];
5230
5231 assert_eq!(body, source);
5232 }
5233
5234 #[test]
5235 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5236 let source = "echo \"$line\" | cut -d' ' -f2-)";
5237
5238 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5239 let body = &source[..consumed];
5240
5241 assert_eq!(body, source);
5242 }
5243
5244 #[test]
5245 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5246 let source = "echo \"${@}\" | tr -d '[:space:]')";
5247
5248 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5249 let body = &source[..consumed];
5250
5251 assert_eq!(body, source);
5252 }
5253
5254 #[test]
5255 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5256 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5257
5258 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5259 let body = &source[..consumed];
5260
5261 assert_eq!(body, source);
5262 }
5263
5264 #[test]
5265 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5266 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5267
5268 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5269 let body = &source[..consumed];
5270
5271 assert_eq!(body, source);
5272 }
5273
5274 #[test]
5275 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5276 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5277 let mut lexer = Lexer::new(source);
5278
5279 let first = lexer.next_lexed_token().expect("expected first token");
5280 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5281 assert_eq!(first.word_string().as_deref(), Some("echo"));
5282
5283 let second = lexer.next_lexed_token().expect("expected second token");
5284 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5285 assert_eq!(
5286 second.word_string().as_deref(),
5287 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5288 );
5289 }
5290
5291 #[test]
5292 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5293 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5294 let start = source.find("$(").expect("expected command substitution") + 2;
5295 let consumed =
5296 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5297 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5298 }
5299
5300 #[test]
5301 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5302 let source = "echo $(echo $(basename $filename .fuzz))";
5303 let start = source.find("$(").expect("expected command substitution") + 2;
5304 let consumed =
5305 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5306 assert_eq!(
5307 &source[start..start + consumed],
5308 "echo $(basename $filename .fuzz))"
5309 );
5310 }
5311
5312 #[test]
5313 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5314 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5315 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5316 assert_eq!(consumed, source.len());
5317 }
5318
5319 #[test]
5320 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5321 let source = "'foo'bar";
5322 let mut lexer = Lexer::new(source);
5323
5324 let token = lexer.next_lexed_token().unwrap();
5325 assert_eq!(token.kind, TokenKind::LiteralWord);
5326
5327 let word = token.word().unwrap();
5328 let segments: Vec<_> = word
5329 .segments()
5330 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5331 .collect();
5332
5333 assert_eq!(
5334 segments,
5335 vec![
5336 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5337 (LexedWordSegmentKind::Plain, "bar".to_string()),
5338 ]
5339 );
5340 assert_eq!(word.joined_text(), "foobar");
5341 assert_eq!(
5342 word.segments()
5343 .nth(1)
5344 .and_then(LexedWordSegment::span)
5345 .unwrap()
5346 .slice(source),
5347 "bar"
5348 );
5349 }
5350
5351 #[test]
5352 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5353 let source = "$(printf hi)";
5354 let mut lexer = Lexer::new(source);
5355
5356 let token = lexer.next_lexed_token().unwrap();
5357 assert_eq!(token.kind, TokenKind::Word);
5358
5359 let word = token.word().unwrap();
5360 let segment = word.single_segment().unwrap();
5361 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5362 assert_eq!(segment.as_str(), source);
5363 assert_eq!(segment.span().unwrap().slice(source), source);
5364 }
5365
5366 #[test]
5367 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5368 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5369 let mut lexer = Lexer::new(source);
5370
5371 let token = lexer.next_lexed_token().unwrap();
5372 assert_eq!(token.kind, TokenKind::Word);
5373
5374 let word = token.word().unwrap();
5375 let segment = word.single_segment().unwrap();
5376 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5377 assert_eq!(segment.as_str(), source);
5378 assert_eq!(segment.span().unwrap().slice(source), source);
5379 }
5380
5381 #[test]
5382 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5383 let source = "\"foo\"$(printf hi)";
5384 let mut lexer = Lexer::new(source);
5385
5386 let token = lexer.next_lexed_token().unwrap();
5387 assert_eq!(token.kind, TokenKind::Word);
5388
5389 let word = token.word().unwrap();
5390 let continuation = word.segments().nth(1).unwrap();
5391 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5392 assert_eq!(continuation.as_str(), "$(printf hi)");
5393 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5394 }
5395
5396 #[test]
5397 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5398 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5399 let mut lexer = Lexer::new(source);
5400
5401 let token = lexer.next_lexed_token().unwrap();
5402 assert_eq!(token.kind, TokenKind::QuotedWord);
5403
5404 let word = token.word().unwrap();
5405 let segment = word.single_segment().unwrap();
5406 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5407 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5408 assert_eq!(
5409 segment.span().unwrap().slice(source),
5410 "${arr[$RANDOM % ${#arr[@]}]}"
5411 );
5412 }
5413
5414 #[test]
5415 fn test_ansi_c_control_escape_can_consume_quote() {
5416 let mut lexer = Lexer::new("echo $'\\c''");
5417
5418 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5419 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5420 assert!(lexer.next_lexed_token().is_none());
5421 }
5422
5423 #[test]
5424 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5425 let source = r#"out_line="${out_line//'"'/'\"'}"
5426"#;
5427 let mut lexer = Lexer::new(source);
5428
5429 assert_next_token(
5430 &mut lexer,
5431 TokenKind::Word,
5432 Some(r#"out_line=${out_line//'"'/'"'}"#),
5433 );
5434 assert_next_token(&mut lexer, TokenKind::Newline, None);
5435 assert!(lexer.next_lexed_token().is_none());
5436 }
5437
5438 #[test]
5439 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5440 let source = r#"out_line="${out_line//'"'/'\"'}"
5441echo "Error: Missing python3!"
5442cat << 'EOF' > "${pywrapper}"
5443import os
5444EOF
5445"#;
5446 let mut lexer = Lexer::new(source);
5447
5448 assert_next_token(
5449 &mut lexer,
5450 TokenKind::Word,
5451 Some(r#"out_line=${out_line//'"'/'"'}"#),
5452 );
5453 assert_next_token(&mut lexer, TokenKind::Newline, None);
5454 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5455 assert_next_token(
5456 &mut lexer,
5457 TokenKind::QuotedWord,
5458 Some("Error: Missing python3!"),
5459 );
5460 assert_next_token(&mut lexer, TokenKind::Newline, None);
5461 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5462 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5463 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5464 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5465 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5466 }
5467
5468 #[test]
5469 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5470 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5471 let mut lexer = Lexer::new(source);
5472
5473 let token = lexer.next_lexed_token().unwrap();
5474 assert_eq!(token.kind, TokenKind::Word);
5475 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5476 assert!(token.source_slice(source).is_none());
5477 assert_eq!(
5478 token.word_string().as_deref(),
5479 Some("crypt=${crypt//\\/\\\\}")
5480 );
5481 assert_next_token(&mut lexer, TokenKind::Newline, None);
5482 assert!(lexer.next_lexed_token().is_none());
5483 }
5484
5485 #[test]
5486 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5487 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5488 let mut lexer = Lexer::new(source);
5489
5490 assert_next_token(
5491 &mut lexer,
5492 TokenKind::Word,
5493 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5494 );
5495 assert_next_token(&mut lexer, TokenKind::Newline, None);
5496 assert_next_token(&mut lexer, TokenKind::Newline, None);
5497 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5498 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5499 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5500 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5501 assert_next_token(&mut lexer, TokenKind::Newline, None);
5502 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5503 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5504 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5505 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5506 assert_next_token(&mut lexer, TokenKind::Newline, None);
5507 assert_next_token(
5508 &mut lexer,
5509 TokenKind::Word,
5510 Some(
5511 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5512 ),
5513 );
5514 assert_next_token(&mut lexer, TokenKind::Newline, None);
5515 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5516 assert_next_token(&mut lexer, TokenKind::Newline, None);
5517 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5518 assert_next_token(&mut lexer, TokenKind::Newline, None);
5519 assert!(lexer.next_lexed_token().is_none());
5520 }
5521
5522 #[test]
5523 fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5524 let source = "case \"$word\" in\n {) : ;;\n :) : ;;\nesac\n";
5525 let mut lexer = Lexer::new(source);
5526
5527 assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5528 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5529 assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5530 assert_next_token(&mut lexer, TokenKind::Newline, None);
5531 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5532 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5533 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5534 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5535 assert_next_token(&mut lexer, TokenKind::Newline, None);
5536 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5537 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5538 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5539 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5540 assert_next_token(&mut lexer, TokenKind::Newline, None);
5541 assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5542 assert_next_token(&mut lexer, TokenKind::Newline, None);
5543 assert!(lexer.next_lexed_token().is_none());
5544 }
5545
5546 #[test]
5547 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5548 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5549 let mut lexer = Lexer::new(source);
5550
5551 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5552 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5553 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5554 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5555 assert_next_token(&mut lexer, TokenKind::And, None);
5556 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5557 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5558 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5559 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5560 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5561 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5562 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5563 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5564 assert_next_token(&mut lexer, TokenKind::Newline, None);
5565 assert!(lexer.next_lexed_token().is_none());
5566 }
5567
5568 #[test]
5569 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5570 let source = "echo -{$(echo a),b}-\n";
5571 let mut lexer = Lexer::new(source);
5572
5573 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5574 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5575 assert_next_token(&mut lexer, TokenKind::Newline, None);
5576 assert!(lexer.next_lexed_token().is_none());
5577 }
5578
5579 #[test]
5580 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5581 let source = "echo -{$((1 + 2)),b}-\n";
5582 let mut lexer = Lexer::new(source);
5583
5584 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5585 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5586 assert_next_token(&mut lexer, TokenKind::Newline, None);
5587 assert!(lexer.next_lexed_token().is_none());
5588 }
5589
5590 #[test]
5591 fn test_operators() {
5592 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5593
5594 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5595 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5596 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5597 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5598 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5599 assert_next_token(&mut lexer, TokenKind::And, None);
5600 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5601 assert_next_token(&mut lexer, TokenKind::Or, None);
5602 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5603 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5604 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5605 assert_next_token(&mut lexer, TokenKind::Background, None);
5606 assert!(lexer.next_lexed_token().is_none());
5607 }
5608
5609 #[test]
5610 fn test_double_left_bracket_requires_separator() {
5611 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5612
5613 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5614 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5615 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5616 assert_next_token(&mut lexer, TokenKind::Newline, None);
5617 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5618 assert_next_token(&mut lexer, TokenKind::Newline, None);
5619 assert!(lexer.next_lexed_token().is_none());
5620 }
5621
5622 #[test]
5623 fn test_redirects() {
5624 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5625
5626 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5627 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5628 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5629 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5630 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5631 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5632 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5633 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5634 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5635 let token = lexer.next_lexed_token().unwrap();
5636 assert_eq!(token.kind, TokenKind::Clobber);
5637 assert_eq!(token.fd_value(), Some(2));
5638 assert_eq!(token_text(&token, lexer.input), None);
5639 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5640 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5641 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5642 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5643 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5644 assert_next_token(&mut lexer, TokenKind::HereString, None);
5645 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5646 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5647 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5648 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5649 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5650 }
5651
5652 #[test]
5653 fn test_comment() {
5654 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5655
5656 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5657 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5658 assert_next_token(&mut lexer, TokenKind::Newline, None);
5659 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5660 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5661 }
5662
5663 #[test]
5664 fn test_comment_token_with_span() {
5665 let mut lexer = Lexer::new("# lead\necho hi # tail");
5666
5667 let comment = lexer.next_lexed_token_with_comments().unwrap();
5668 assert_eq!(comment.kind, TokenKind::Comment);
5669 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5670 assert_eq!(comment.span.start.line, 1);
5671 assert_eq!(comment.span.start.column, 1);
5672 assert_eq!(comment.span.end.line, 1);
5673 assert_eq!(comment.span.end.column, 7);
5674
5675 assert_next_token(&mut lexer, TokenKind::Newline, None);
5676 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5677 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5678
5679 let inline = lexer.next_lexed_token_with_comments().unwrap();
5680 assert_eq!(inline.kind, TokenKind::Comment);
5681 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5682 assert_eq!(inline.span.start.line, 2);
5683 assert_eq!(inline.span.start.column, 9);
5684 }
5685
5686 #[test]
5687 fn test_comment_token_preserves_hash_boundaries() {
5688 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5689
5690 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5691 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5692 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5693 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5694 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5695 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5696 assert!(lexer.next_lexed_token_with_comments().is_none());
5697 }
5698
5699 #[test]
5700 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5701 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5702
5703 let mut saw_comment = false;
5704 while let Some(token) = lexer.next_lexed_token_with_comments() {
5705 if token.kind == TokenKind::Comment {
5706 saw_comment = true;
5707 break;
5708 }
5709 }
5710
5711 assert!(
5712 !saw_comment,
5713 "zsh inline glob controls inside [[ ]] should not lex as comments"
5714 );
5715 }
5716
5717 #[test]
5718 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5719 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5720
5721 let mut saw_comment = false;
5722 while let Some(token) = lexer.next_lexed_token_with_comments() {
5723 if token.kind == TokenKind::Comment {
5724 saw_comment = true;
5725 break;
5726 }
5727 }
5728
5729 assert!(
5730 !saw_comment,
5731 "zsh arithmetic char literals inside (( )) should not lex as comments"
5732 );
5733 }
5734
5735 #[test]
5736 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5737 let mut lexer = Lexer::new(
5738 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5739 );
5740
5741 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5742 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5743 assert_next_token(
5744 &mut lexer,
5745 TokenKind::LiteralWord,
5746 Some("\\e]133;C;cmdline_url=%s\\a"),
5747 );
5748 assert_next_token(
5749 &mut lexer,
5750 TokenKind::QuotedWord,
5751 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5752 );
5753 assert_next_token(&mut lexer, TokenKind::Newline, None);
5754 }
5755
5756 #[test]
5757 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5758 let mut lexer = Lexer::new(
5759 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5760 );
5761
5762 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5763 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5764 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5765 assert_next_token(&mut lexer, TokenKind::Newline, None);
5766 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5767 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5768 assert_next_token(
5769 &mut lexer,
5770 TokenKind::LiteralWord,
5771 Some("\\e]133;C;cmdline_url=%s\\a"),
5772 );
5773 assert_next_token(
5774 &mut lexer,
5775 TokenKind::QuotedWord,
5776 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5777 );
5778 assert_next_token(&mut lexer, TokenKind::Newline, None);
5779 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5780 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5781 assert_next_token(&mut lexer, TokenKind::Newline, None);
5782 }
5783
5784 #[test]
5785 fn test_variable_words() {
5786 let mut lexer = Lexer::new("echo $HOME $USER");
5787
5788 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5789 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5790 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5791 assert!(lexer.next_lexed_token().is_none());
5792 }
5793
5794 #[test]
5795 fn test_pipeline_tokens() {
5796 let mut lexer = Lexer::new("echo hello | cat");
5797
5798 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5799 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5800 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5801 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5802 assert!(lexer.next_lexed_token().is_none());
5803 }
5804
5805 #[test]
5806 fn test_read_heredoc() {
5807 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5809 let content = lexer.read_heredoc("EOF", false);
5810 assert_eq!(content.content, "hello\nworld\n");
5811 }
5812
5813 #[test]
5814 fn test_read_heredoc_single_line() {
5815 let mut lexer = Lexer::new("\ntest\nEOF");
5816 let content = lexer.read_heredoc("EOF", false);
5817 assert_eq!(content.content, "test\n");
5818 }
5819
5820 #[test]
5821 fn test_read_heredoc_full_scenario() {
5822 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5824
5825 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5827 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5828 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5829
5830 let content = lexer.read_heredoc("EOF", false);
5832 assert_eq!(content.content, "hello\nworld\n");
5833 }
5834
5835 #[test]
5836 fn test_read_heredoc_with_redirect() {
5837 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5839 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5840 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5841 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5842 let content = lexer.read_heredoc("EOF", false);
5843 assert_eq!(content.content, "hello\n");
5844 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5846 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5847 }
5848
5849 #[test]
5850 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5851 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
5852 let mut lexer = Lexer::new(source);
5853
5854 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5855 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5856 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5857
5858 let heredoc = lexer.read_heredoc("EOF", false);
5859 assert_eq!(heredoc.content, "hello\n");
5860
5861 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5862 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5863 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5864 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5865 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5866 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5867 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5868 }
5869
5870 #[test]
5871 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5872 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5873 let mut lexer = Lexer::new(source);
5874
5875 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5876 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5877 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5878
5879 let heredoc = lexer.read_heredoc("EOF", false);
5880 assert_eq!(heredoc.content, "1\n2\n3\n");
5881 }
5882
5883 #[test]
5884 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5885 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5886 let mut lexer = Lexer::new(source);
5887
5888 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5889 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5890 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5891
5892 let heredoc = lexer.read_heredoc("EOF", false);
5893 assert_eq!(heredoc.content, "body\n");
5894 }
5895
5896 #[test]
5897 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5898 let source = "cat <<EOF # note \\\nbody\nEOF\n";
5899 let mut lexer = Lexer::new(source);
5900
5901 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5902 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5903 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5904
5905 let heredoc = lexer.read_heredoc("EOF", false);
5906 assert_eq!(heredoc.content, "body\n");
5907 }
5908
5909 #[test]
5910 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5911 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5912 let mut lexer = Lexer::new(source);
5913
5914 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5915 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5916 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5917 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5918
5919 let heredoc = lexer.read_heredoc("EOF", false);
5920 assert_eq!(heredoc.content, "body\n");
5921
5922 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5923 }
5924
5925 #[test]
5926 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5927 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5928 let mut lexer = Lexer::new(source);
5929
5930 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5931 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5932 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5933
5934 let heredoc = lexer.read_heredoc("EOF", false);
5935 assert_eq!(heredoc.content, "1\n");
5936
5937 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5938 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5939 }
5940
5941 #[test]
5942 fn test_read_heredoc_with_redirect_preserves_following_spans() {
5943 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5944 let mut lexer = Lexer::new(source);
5945
5946 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5947 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5948 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5949
5950 let heredoc = lexer.read_heredoc("EOF", false);
5951 assert_eq!(heredoc.content, "hello\n");
5952
5953 let redirect = lexer.next_lexed_token_with_comments().unwrap();
5954 assert_eq!(redirect.kind, TokenKind::RedirectOut);
5955 assert_eq!(redirect.span.slice(source), ">");
5956
5957 let target = lexer.next_lexed_token_with_comments().unwrap();
5958 assert_eq!(target.kind, TokenKind::Word);
5959 assert_eq!(
5960 token_text(&target, lexer.input).as_deref(),
5961 Some("file.txt")
5962 );
5963 assert_eq!(target.span.slice(source), "file.txt");
5964
5965 let newline = lexer.next_lexed_token_with_comments().unwrap();
5966 assert_eq!(newline.kind, TokenKind::Newline);
5967 assert_eq!(newline.span.slice(source), "\n");
5968
5969 let comment = lexer.next_lexed_token_with_comments().unwrap();
5970 assert_eq!(comment.kind, TokenKind::Comment);
5971 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5972 assert_eq!(comment.span.slice(source), "# done");
5973 }
5974
5975 #[test]
5976 fn test_comment_with_unicode() {
5977 let source = "# café résumé\necho ok";
5979 let mut lexer = Lexer::new(source);
5980
5981 let comment = lexer.next_lexed_token_with_comments().unwrap();
5982 assert_eq!(comment.kind, TokenKind::Comment);
5983 assert_eq!(
5984 token_text(&comment, lexer.input).as_deref(),
5985 Some(" café résumé")
5986 );
5987 let start = comment.span.start.offset;
5989 let end = comment.span.end.offset;
5990 assert_eq!(start, 0);
5991 assert_eq!(&source[start..end], "# café résumé");
5992 assert!(source.is_char_boundary(start));
5993 assert!(source.is_char_boundary(end));
5994
5995 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5996 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5997 }
5998
5999 #[test]
6000 fn test_comment_with_cjk_characters() {
6001 let source = "# 你好世界\necho ok";
6003 let mut lexer = Lexer::new(source);
6004
6005 let comment = lexer.next_lexed_token_with_comments().unwrap();
6006 assert_eq!(comment.kind, TokenKind::Comment);
6007 assert_eq!(
6008 token_text(&comment, lexer.input).as_deref(),
6009 Some(" 你好世界")
6010 );
6011 let start = comment.span.start.offset;
6012 let end = comment.span.end.offset;
6013 assert_eq!(&source[start..end], "# 你好世界");
6014 assert!(source.is_char_boundary(start));
6015 assert!(source.is_char_boundary(end));
6016 }
6017
6018 #[test]
6019 fn test_heredoc_with_comments_inside() {
6020 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6022 let mut lexer = Lexer::new(source);
6023
6024 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6025 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6026 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6027
6028 let heredoc = lexer.read_heredoc("EOF", false);
6029 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6030
6031 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6034 let comment = lexer.next_lexed_token_with_comments().unwrap();
6035 assert_eq!(comment.kind, TokenKind::Comment);
6036 assert_eq!(
6037 token_text(&comment, lexer.input).as_deref(),
6038 Some(" real comment")
6039 );
6040 }
6041
6042 #[test]
6043 fn test_heredoc_with_hash_in_variable() {
6044 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6046 let mut lexer = Lexer::new(source);
6047
6048 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6049 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6050 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6051
6052 let heredoc = lexer.read_heredoc("EOF", false);
6053 assert_eq!(heredoc.content, "val=${x#prefix}\n");
6054 }
6055
6056 #[test]
6057 fn test_heredoc_span_does_not_leak() {
6058 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6061 let mut lexer = Lexer::new(source);
6062
6063 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6064 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6065 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6066
6067 let heredoc = lexer.read_heredoc("EOF", false);
6068 let start = heredoc.content_span.start.offset;
6069 let end = heredoc.content_span.end.offset;
6070 assert!(
6071 end <= source.len(),
6072 "heredoc span end ({end}) exceeds source length ({})",
6073 source.len()
6074 );
6075 assert_eq!(&source[start..end], "hello\nworld\n");
6076
6077 assert_next_token(&mut lexer, TokenKind::Newline, None);
6079 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6080 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6081 }
6082
6083 #[test]
6084 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6085 let source = "\
6086cat <<\\_ACEOF
6087Use these variables to override the choices made by `configure' or to help
6088it to find libraries and programs with nonstandard names/locations.
6089_ACEOF
6090ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6091ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6092";
6093 let mut lexer = Lexer::new(source);
6094
6095 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6096 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6097 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6098 assert_eq!(delimiter.kind, TokenKind::Word);
6099 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6100
6101 let heredoc = lexer.read_heredoc("_ACEOF", false);
6102 assert_eq!(
6103 heredoc.content,
6104 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6105 );
6106
6107 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6108
6109 let first = lexer.next_lexed_token_with_comments().unwrap();
6110 assert_eq!(first.kind, TokenKind::Word);
6111 assert_eq!(
6112 first.span.slice(source),
6113 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6114 );
6115 let first_segments = first
6116 .word()
6117 .unwrap()
6118 .segments()
6119 .map(|segment| {
6120 (
6121 segment.kind(),
6122 segment.as_str().to_string(),
6123 segment.span().map(|span| span.slice(source).to_string()),
6124 )
6125 })
6126 .collect::<Vec<_>>();
6127 assert_eq!(
6128 first_segments,
6129 vec![
6130 (
6131 LexedWordSegmentKind::Plain,
6132 "ac_dir_suffix=/".to_string(),
6133 Some("ac_dir_suffix=/".to_string()),
6134 ),
6135 (
6136 LexedWordSegmentKind::Plain,
6137 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6138 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6139 ),
6140 ]
6141 );
6142
6143 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6144
6145 let second = lexer.next_lexed_token_with_comments().unwrap();
6146 assert_eq!(second.kind, TokenKind::Word);
6147 assert_eq!(
6148 second.span.slice(source),
6149 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6150 );
6151 let second_segments = second
6152 .word()
6153 .unwrap()
6154 .segments()
6155 .map(|segment| {
6156 (
6157 segment.kind(),
6158 segment.as_str().to_string(),
6159 segment.span().map(|span| span.slice(source).to_string()),
6160 )
6161 })
6162 .collect::<Vec<_>>();
6163 assert_eq!(
6164 second_segments,
6165 vec![
6166 (
6167 LexedWordSegmentKind::Plain,
6168 "ac_top_builddir_sub=".to_string(),
6169 Some("ac_top_builddir_sub=".to_string()),
6170 ),
6171 (
6172 LexedWordSegmentKind::Plain,
6173 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6174 Some(
6175 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6176 .to_string(),
6177 ),
6178 ),
6179 ]
6180 );
6181 }
6182
6183 #[test]
6184 fn test_heredoc_with_unicode_content() {
6185 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6187 let mut lexer = Lexer::new(source);
6188
6189 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6190 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6191 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6192
6193 let heredoc = lexer.read_heredoc("EOF", false);
6194 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6195 let start = heredoc.content_span.start.offset;
6196 let end = heredoc.content_span.end.offset;
6197 assert!(
6198 source.is_char_boundary(start),
6199 "heredoc span start ({start}) not on char boundary"
6200 );
6201 assert!(
6202 source.is_char_boundary(end),
6203 "heredoc span end ({end}) not on char boundary"
6204 );
6205 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6206 }
6207
6208 #[test]
6209 fn test_assoc_compound_assignment() {
6210 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6213 assert_next_token(
6214 &mut lexer,
6215 TokenKind::Word,
6216 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6217 );
6218 assert!(lexer.next_lexed_token().is_none());
6219 }
6220
6221 #[test]
6222 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6223 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6224 let mut lexer = Lexer::new(source);
6225
6226 let token = lexer.next_lexed_token().unwrap();
6227 assert_eq!(token.kind, TokenKind::Word);
6228 assert_eq!(token.span.slice(source), source);
6229 assert!(lexer.next_lexed_token().is_none());
6230 }
6231
6232 #[test]
6233 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6234 let source = r#"foo\_bar@(baz|qux)"#;
6235 let mut lexer = Lexer::new(source);
6236
6237 let token = lexer.next_lexed_token().unwrap();
6238 assert_eq!(token.kind, TokenKind::Word);
6239 assert_eq!(token.span.slice(source), source);
6240 assert!(lexer.next_lexed_token().is_none());
6241 }
6242
6243 #[test]
6244 fn test_indexed_array_not_collapsed() {
6245 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6248 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6249 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6250 }
6251
6252 #[test]
6253 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6254 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6255 let mut lexer = Lexer::new(source);
6256
6257 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6258 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6259
6260 let token = lexer.next_lexed_token().unwrap();
6261 assert_eq!(token.kind, TokenKind::Word);
6262 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6263
6264 let word = token.word().unwrap();
6265 let segments: Vec<_> = word
6266 .segments()
6267 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6268 .collect();
6269 assert_eq!(
6270 segments,
6271 vec![
6272 (
6273 LexedWordSegmentKind::DoubleQuoted,
6274 "$plugin_dir".to_string()
6275 ),
6276 (LexedWordSegmentKind::Plain, "/*".to_string()),
6277 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6278 ]
6279 );
6280
6281 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6282 assert!(lexer.next_lexed_token().is_none());
6283 }
6284
6285 #[test]
6286 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6287 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6288 let mut lexer = Lexer::new(source);
6289
6290 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6291 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6292
6293 let token = lexer.next_lexed_token().unwrap();
6294 assert_eq!(token.kind, TokenKind::Word);
6295 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6296
6297 let word = token.word().unwrap();
6298 let segments: Vec<_> = word
6299 .segments()
6300 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6301 .collect();
6302 assert_eq!(
6303 segments,
6304 vec![
6305 (
6306 LexedWordSegmentKind::DoubleQuoted,
6307 "$__GREP_CACHE_FILE".to_string()
6308 ),
6309 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6310 ]
6311 );
6312
6313 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6314 assert!(lexer.next_lexed_token().is_none());
6315 }
6316
6317 #[test]
6318 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6319 let source = r#"$dir/${~pats}(N)"#;
6320 let mut lexer = Lexer::new(source);
6321
6322 let token = lexer.next_lexed_token().unwrap();
6323 assert_eq!(token.kind, TokenKind::Word);
6324 assert_eq!(token.span.slice(source), source);
6325 assert!(lexer.next_lexed_token().is_none());
6326 }
6327
6328 #[test]
6329 fn test_dollar_word_does_not_absorb_function_parens() {
6330 let mut lexer = Lexer::new(r#"foo$x()"#);
6331
6332 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6333 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6334 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6335 assert!(lexer.next_lexed_token().is_none());
6336 }
6337
6338 #[test]
6339 fn test_command_substitution_word_does_not_absorb_function_parens() {
6340 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6341
6342 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6343 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6344 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6345 assert!(lexer.next_lexed_token().is_none());
6346 }
6347
6348 #[test]
6351 fn test_digit_at_eof_no_panic() {
6352 let mut lexer = Lexer::new("2");
6354 let token = lexer.next_lexed_token();
6355 assert!(token.is_some());
6356 }
6357
6358 #[test]
6360 fn test_nested_brace_expansion_single_token() {
6361 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6363 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6364 assert!(lexer.next_lexed_token().is_none());
6366 }
6367
6368 #[test]
6370 fn test_simple_brace_expansion_unchanged() {
6371 let mut lexer = Lexer::new("${foo}");
6372 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6373 assert!(lexer.next_lexed_token().is_none());
6374 }
6375
6376 #[test]
6377 fn test_nvm_fixture_lexes_without_stalling() {
6378 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6379 let mut lexer = Lexer::new(input);
6380 let mut tokens = 0usize;
6381
6382 while lexer.next_lexed_token().is_some() {
6383 tokens += 1;
6384 assert!(
6385 tokens < 100_000,
6386 "lexer should continue making progress on the nvm fixture"
6387 );
6388 }
6389
6390 assert!(tokens > 0, "nvm fixture should produce at least one token");
6391 }
6392
6393 #[test]
6394 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6395 let input = concat!(
6396 "case \"${_input_type:-}\" in\n",
6397 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6398 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6399 "esac\n",
6400 );
6401
6402 assert_non_newline_tokens_stay_on_one_line(input);
6403
6404 let mut lexer = Lexer::new(input);
6405 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6406 .map(|token| (token.kind, token_text(&token, input)))
6407 .collect::<Vec<_>>();
6408 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6409 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6410 }
6411
6412 #[test]
6413 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6414 let input = concat!(
6415 "case $2 in\n",
6416 " cygwin*) bin='cygwin32/bin' ;|\n",
6417 "esac\n",
6418 );
6419
6420 let mut lexer = Lexer::new(input);
6421 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6422 .map(|token| (token.kind, token_text(&token, input)))
6423 .collect::<Vec<_>>();
6424
6425 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6426 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6427 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6428 }
6429
6430 #[test]
6431 fn test_inline_if_with_array_append_stays_line_local() {
6432 let input = concat!(
6433 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6434 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6435 );
6436
6437 assert_non_newline_tokens_stay_on_one_line(input);
6438 }
6439
6440 #[test]
6441 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6442 let source = "unsetopt interactive_comments\n#literal\n";
6443 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6444 let mut lexer = Lexer::with_profile(source, &profile);
6445
6446 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6447 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6448 assert_next_token(&mut lexer, TokenKind::Newline, None);
6449 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6450 }
6451
6452 #[test]
6453 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6454 let source = "setopt rc_quotes\nprint 'a''b'\n";
6455 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6456 let mut lexer = Lexer::with_profile(source, &profile);
6457
6458 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6459 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6460 assert_next_token(&mut lexer, TokenKind::Newline, None);
6461 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6462 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6463 }
6464
6465 #[test]
6466 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6467 let source = "setopt ignore_braces\n{ echo }\n";
6468 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6469 let mut lexer = Lexer::with_profile(source, &profile);
6470
6471 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6472 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6473 assert_next_token(&mut lexer, TokenKind::Newline, None);
6474 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6475 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6476 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6477 }
6478
6479 #[test]
6480 fn test_heredoc_in_arithmetic_fuzz_crash() {
6481 let data: &[u8] = &[
6485 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6486 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6487 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6488 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6489 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6490 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6491 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6492 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6493 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6494 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6495 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6496 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6497 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6498 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6499 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6500 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6501 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6502 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6503 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6504 ];
6505 let input = std::str::from_utf8(data).unwrap();
6506 let script = format!("echo $(({input}))\n");
6507 let _ = crate::parser::Parser::new(&script).parse();
6509 }
6510}