1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335 pub kind: TokenKind,
337 pub span: Span,
339 pub(crate) flags: TokenFlags,
340 payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345 match kind {
346 TokenKind::Word => LexedWordSegmentKind::Plain,
347 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349 _ => LexedWordSegmentKind::Composite,
350 }
351 }
352
353 pub(crate) fn punctuation(kind: TokenKind) -> Self {
354 Self {
355 kind,
356 span: Span::new(),
357 flags: TokenFlags::empty(),
358 payload: TokenPayload::None,
359 }
360 }
361
362 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363 let flags = if word.has_cooked_text() {
364 TokenFlags::cooked_text()
365 } else {
366 TokenFlags::empty()
367 };
368
369 Self {
370 kind,
371 span: Span::new(),
372 flags,
373 payload: TokenPayload::Word(word),
374 }
375 }
376
377 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378 Self::with_word_payload(
379 kind,
380 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381 )
382 }
383
384 fn owned_word(kind: TokenKind, text: String) -> Self {
385 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386 }
387
388 fn comment() -> Self {
389 Self {
390 kind: TokenKind::Comment,
391 span: Span::new(),
392 flags: TokenFlags::empty(),
393 payload: TokenPayload::None,
394 }
395 }
396
397 fn fd(kind: TokenKind, fd: i32) -> Self {
398 Self {
399 kind,
400 span: Span::new(),
401 flags: TokenFlags::empty(),
402 payload: TokenPayload::Fd(fd),
403 }
404 }
405
406 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407 Self {
408 kind,
409 span: Span::new(),
410 flags: TokenFlags::empty(),
411 payload: TokenPayload::FdPair(src_fd, dst_fd),
412 }
413 }
414
415 fn error(kind: LexerErrorKind) -> Self {
416 Self {
417 kind: TokenKind::Error,
418 span: Span::new(),
419 flags: TokenFlags::empty(),
420 payload: TokenPayload::Error(kind),
421 }
422 }
423
424 pub(crate) fn with_span(mut self, span: Span) -> Self {
425 self.span = span;
426 self
427 }
428
429 pub(crate) fn rebased(mut self, base: Position) -> Self {
430 self.span = self.span.rebased(base);
431 self.payload = match self.payload {
432 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433 payload => payload,
434 };
435 self
436 }
437
438 pub(crate) fn with_synthetic_flag(mut self) -> Self {
439 self.flags = self.flags.with_synthetic();
440 self
441 }
442
443 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444 let payload = match self.payload {
445 TokenPayload::None => TokenPayload::None,
446 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449 TokenPayload::Error(kind) => TokenPayload::Error(kind),
450 };
451
452 LexedToken {
453 kind: self.kind,
454 span: self.span,
455 flags: self.flags,
456 payload,
457 }
458 }
459
460 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461 let payload = match self.payload {
462 TokenPayload::None => TokenPayload::None,
463 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466 TokenPayload::Error(kind) => TokenPayload::Error(kind),
467 };
468
469 LexedToken {
470 kind: self.kind,
471 span: self.span,
472 flags: self.flags,
473 payload,
474 }
475 }
476
477 pub fn word_text(&self) -> Option<&str> {
479 self.kind
480 .is_word_like()
481 .then_some(())
482 .and_then(|_| match &self.payload {
483 TokenPayload::Word(word) => word.text(),
484 _ => None,
485 })
486 }
487
488 pub fn word_string(&self) -> Option<String> {
490 self.kind
491 .is_word_like()
492 .then_some(())
493 .and_then(|_| match &self.payload {
494 TokenPayload::Word(word) => Some(word.joined_text()),
495 _ => None,
496 })
497 }
498
499 pub fn word(&self) -> Option<&LexedWord<'a>> {
501 match &self.payload {
502 TokenPayload::Word(word) => Some(word),
503 _ => None,
504 }
505 }
506
507 pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510 return None;
511 }
512
513 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514 .then(|| &source[self.span.start.offset..self.span.end.offset])
515 }
516
517 pub fn fd_value(&self) -> Option<i32> {
519 match self.payload {
520 TokenPayload::Fd(fd) => Some(fd),
521 _ => None,
522 }
523 }
524
525 pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527 match self.payload {
528 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529 _ => None,
530 }
531 }
532
533 pub fn error_kind(&self) -> Option<LexerErrorKind> {
535 match self.payload {
536 TokenPayload::Error(kind) => Some(kind),
537 _ => None,
538 }
539 }
540}
541
542#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545 pub content: String,
547 pub content_span: Span,
549}
550
551const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557 rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561 fn new(source: &'a str) -> Self {
562 Self { rest: source }
563 }
564
565 fn first(&self) -> Option<char> {
566 self.rest.chars().next()
567 }
568
569 fn second(&self) -> Option<char> {
570 let mut chars = self.rest.chars();
571 chars.next()?;
572 chars.next()
573 }
574
575 fn third(&self) -> Option<char> {
576 let mut chars = self.rest.chars();
577 chars.next()?;
578 chars.next()?;
579 chars.next()
580 }
581
582 fn bump(&mut self) -> Option<char> {
583 let ch = self.first()?;
584 self.rest = &self.rest[ch.len_utf8()..];
585 Some(ch)
586 }
587
588 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589 let start = self.rest;
590 let mut end = 0;
591
592 for ch in start.chars() {
593 if !predicate(ch) {
594 break;
595 }
596 end += ch.len_utf8();
597 }
598
599 self.rest = &start[end..];
600 &start[..end]
601 }
602
603 fn rest(&self) -> &'a str {
604 self.rest
605 }
606
607 fn skip_bytes(&mut self, count: usize) {
608 self.rest = &self.rest[count..];
609 }
610
611 fn find_byte(&self, byte: u8) -> Option<usize> {
612 memchr(byte, self.rest.as_bytes())
613 }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618 source: &'a str,
619 line_starts: Arc<[usize]>,
620 cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626 pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630 fn new(source: &'a str) -> Self {
631 let mut line_starts =
632 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633 line_starts.push(0);
634 line_starts.extend(
635 source
636 .bytes()
637 .enumerate()
638 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639 );
640
641 Self {
642 source,
643 line_starts: line_starts.into(),
644 cached: Position::new(),
645 }
646 }
647
648 fn position(&mut self, offset: usize) -> Position {
649 if offset == self.cached.offset {
650 return self.cached;
651 }
652
653 let position = if offset > self.cached.offset && offset <= self.source.len() {
654 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655 } else {
656 self.position_uncached(offset)
657 };
658 self.cached = position;
659 position
660 }
661
662 fn position_uncached(&self, offset: usize) -> Position {
663 let offset = offset.min(self.source.len());
664 let line_index = self
665 .line_starts
666 .partition_point(|start| *start <= offset)
667 .saturating_sub(1);
668 let line_start = self.line_starts[line_index];
669 let line_text = &self.source[line_start..offset];
670 let column = if line_text.is_ascii() {
671 line_text.len() + 1
672 } else {
673 line_text.chars().count() + 1
674 };
675
676 Position {
677 line: line_index + 1,
678 column,
679 offset,
680 }
681 }
682
683 fn advance_from(mut position: Position, text: &str) -> Position {
684 position.offset += text.len();
685 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686 if newline_count == 0 {
687 position.column += if text.is_ascii() {
688 text.len()
689 } else {
690 text.chars().count()
691 };
692 return position;
693 }
694
695 position.line += newline_count;
696 let tail_start = memrchr(b'\n', text.as_bytes())
697 .map(|index| index + 1)
698 .unwrap_or_default();
699 let tail = &text[tail_start..];
700 position.column = if tail.is_ascii() {
701 tail.len() + 1
702 } else {
703 tail.chars().count() + 1
704 };
705 position
706 }
707}
708
709#[derive(Clone)]
711pub struct Lexer<'a> {
712 #[allow(dead_code)] input: &'a str,
714 offset: usize,
716 cursor: Cursor<'a>,
717 position_map: PositionMap<'a>,
718 reinject_buf: VecDeque<char>,
721 reinject_resume_offset: Option<usize>,
723 max_subst_depth: usize,
725 initial_zsh_options: Option<ZshOptionState>,
726 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727 zsh_timeline_index: usize,
728 #[cfg(feature = "benchmarking")]
729 benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733 pub fn new(input: &'a str) -> Self {
735 Self::with_max_subst_depth_and_profile(
736 input,
737 DEFAULT_MAX_SUBST_DEPTH,
738 &ShellProfile::native(super::ShellDialect::Bash),
739 None,
740 )
741 }
742
743 pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746 Self::with_max_subst_depth_and_profile(
747 input,
748 max_depth,
749 &ShellProfile::native(super::ShellDialect::Bash),
750 None,
751 )
752 }
753
754 pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757 .then(|| ZshOptionTimeline::build(input, shell_profile))
758 .flatten()
759 .map(Arc::new);
760 Self::with_max_subst_depth_and_profile(
761 input,
762 DEFAULT_MAX_SUBST_DEPTH,
763 shell_profile,
764 zsh_timeline,
765 )
766 }
767
768 pub(crate) fn with_max_subst_depth_and_profile(
769 input: &'a str,
770 max_depth: usize,
771 shell_profile: &ShellProfile,
772 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773 ) -> Self {
774 Self {
775 input,
776 offset: 0,
777 cursor: Cursor::new(input),
778 position_map: PositionMap::new(input),
779 reinject_buf: VecDeque::new(),
780 reinject_resume_offset: None,
781 max_subst_depth: max_depth,
782 initial_zsh_options: shell_profile.zsh_options().cloned(),
783 zsh_timeline,
784 zsh_timeline_index: 0,
785 #[cfg(feature = "benchmarking")]
786 benchmark_counters: None,
787 }
788 }
789
790 pub fn position(&self) -> Position {
792 self.position_map.position_uncached(self.offset)
793 }
794
795 pub(super) fn position_at_offset(&self, offset: usize) -> Position {
796 self.position_map.position_uncached(offset)
797 }
798
799 fn current_position(&mut self) -> Position {
800 #[cfg(feature = "benchmarking")]
801 self.maybe_record_current_position_call();
802 self.position_map.position(self.offset)
803 }
804
805 #[cfg(feature = "benchmarking")]
806 pub(crate) fn enable_benchmark_counters(&mut self) {
807 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
808 }
809
810 #[cfg(feature = "benchmarking")]
811 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
812 self.benchmark_counters.unwrap_or_default()
813 }
814
815 #[cfg(feature = "benchmarking")]
816 fn maybe_record_current_position_call(&mut self) {
817 if let Some(counters) = &mut self.benchmark_counters {
818 counters.current_position_calls += 1;
819 }
820 }
821
822 fn sync_offset_to_cursor(&mut self) {
823 if self.reinject_buf.is_empty()
824 && let Some(offset) = self.reinject_resume_offset.take()
825 {
826 self.offset = offset;
827 }
828 }
829
830 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
833 self.next_lexed_token().map(|token| token.kind)
834 }
835
836 fn peek_char(&mut self) -> Option<char> {
837 self.sync_offset_to_cursor();
838 if let Some(&ch) = self.reinject_buf.front() {
839 Some(ch)
840 } else {
841 self.cursor.first()
842 }
843 }
844
845 fn advance(&mut self) -> Option<char> {
846 self.sync_offset_to_cursor();
847 let ch = if !self.reinject_buf.is_empty() {
848 self.reinject_buf.pop_front()
849 } else {
850 self.cursor.bump()
851 };
852 if let Some(c) = ch {
853 self.offset += c.len_utf8();
854 }
855 ch
856 }
857
858 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
859 self.reinject_buf
860 .iter()
861 .copied()
862 .chain(self.cursor.rest().chars())
863 }
864
865 fn second_char(&self) -> Option<char> {
866 match self.reinject_buf.len() {
867 0 => self.cursor.second(),
868 1 => self.cursor.first(),
869 _ => self.reinject_buf.get(1).copied(),
870 }
871 }
872
873 fn third_char(&self) -> Option<char> {
874 match self.reinject_buf.len() {
875 0 => self.cursor.third(),
876 1 => self.cursor.second(),
877 2 => self.cursor.first(),
878 _ => self.reinject_buf.get(2).copied(),
879 }
880 }
881
882 fn fourth_char(&self) -> Option<char> {
883 match self.reinject_buf.len() {
884 0 => self.cursor.rest().chars().nth(3),
885 1 => self.cursor.third(),
886 2 => self.cursor.second(),
887 3 => self.cursor.first(),
888 _ => self.reinject_buf.get(3).copied(),
889 }
890 }
891
892 fn consume_source_bytes(&mut self, byte_len: usize) {
893 debug_assert!(self.reinject_buf.is_empty());
894 self.sync_offset_to_cursor();
895 self.offset += byte_len;
896 self.cursor.skip_bytes(byte_len);
897 }
898
899 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
900 debug_assert!(self.reinject_buf.is_empty());
901 self.offset += byte_len;
902 }
903
904 fn consume_ascii_chars(&mut self, count: usize) {
905 if self.reinject_buf.is_empty() {
906 self.consume_source_bytes(count);
907 return;
908 }
909
910 for _ in 0..count {
911 self.advance();
912 }
913 }
914
915 fn source_horizontal_whitespace_len(&self) -> usize {
916 self.cursor
917 .rest()
918 .as_bytes()
919 .iter()
920 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
921 .count()
922 }
923
924 fn source_ascii_plain_word_len(&self) -> usize {
925 self.cursor
926 .rest()
927 .as_bytes()
928 .iter()
929 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
930 .count()
931 }
932
933 fn find_double_quote_special(source: &str) -> Option<usize> {
934 source
935 .as_bytes()
936 .iter()
937 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
938 }
939
940 fn ensure_capture_from_source(
941 &self,
942 capture: &mut Option<String>,
943 start: Position,
944 end: Position,
945 ) {
946 if capture.is_none() {
947 *capture = Some(self.input[start.offset..end.offset].to_string());
948 }
949 }
950
951 fn push_capture_char(capture: &mut Option<String>, ch: char) {
952 if let Some(text) = capture.as_mut() {
953 text.push(ch);
954 }
955 }
956
957 fn push_capture_str(capture: &mut Option<String>, text: &str) {
958 if let Some(current) = capture.as_mut() {
959 current.push_str(text);
960 }
961 }
962
963 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
964 if let Some(timeline) = self.zsh_timeline.as_ref() {
965 while self.zsh_timeline_index < timeline.entries.len()
966 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
967 {
968 self.zsh_timeline_index += 1;
969 }
970 return if self.zsh_timeline_index == 0 {
971 self.initial_zsh_options.as_ref()
972 } else {
973 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
974 };
975 }
976
977 self.initial_zsh_options.as_ref()
978 }
979
980 fn comments_enabled(&mut self) -> bool {
981 !self
982 .current_zsh_options()
983 .is_some_and(|options| options.interactive_comments.is_definitely_off())
984 }
985
986 fn rc_quotes_enabled(&mut self) -> bool {
987 self.current_zsh_options()
988 .is_some_and(|options| options.rc_quotes.is_definitely_on())
989 }
990
991 fn ignore_braces_enabled(&mut self) -> bool {
992 self.current_zsh_options()
993 .is_some_and(|options| options.ignore_braces.is_definitely_on())
994 }
995
996 fn ignore_close_braces_enabled(&mut self) -> bool {
997 self.current_zsh_options().is_some_and(|options| {
998 options.ignore_braces.is_definitely_on()
999 || options.ignore_close_braces.is_definitely_on()
1000 })
1001 }
1002
1003 fn should_treat_hash_as_word_char(&mut self) -> bool {
1004 if !self.comments_enabled() {
1005 return true;
1006 }
1007 self.reinject_buf.is_empty()
1008 && (self
1009 .input
1010 .get(..self.offset)
1011 .and_then(|prefix| prefix.chars().next_back())
1012 .is_some_and(|prev| {
1013 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1014 })
1015 || self.is_inside_unclosed_double_paren_on_line())
1016 }
1017
1018 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1019 capture
1020 .as_deref()
1021 .unwrap_or(&self.input[start.offset..self.offset])
1022 }
1023
1024 fn current_word_surface_is_single_char(
1025 &self,
1026 start: Position,
1027 capture: &Option<String>,
1028 target: char,
1029 ) -> bool {
1030 let text = self.current_word_text(start, capture);
1031 if !text.contains('\x00') {
1032 let mut encoded = [0; 4];
1033 return text == target.encode_utf8(&mut encoded);
1034 }
1035
1036 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1037 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1038 }
1039
1040 fn current_word_surface_last_char<'b>(
1041 &'b self,
1042 start: Position,
1043 capture: &'b Option<String>,
1044 ) -> Option<char> {
1045 self.current_word_text(start, capture)
1046 .chars()
1047 .rev()
1048 .find(|&ch| ch != '\x00')
1049 }
1050
1051 fn current_word_surface_ends_with_char(
1052 &self,
1053 start: Position,
1054 capture: &Option<String>,
1055 target: char,
1056 ) -> bool {
1057 self.current_word_surface_last_char(start, capture) == Some(target)
1058 }
1059
1060 fn current_word_surface_ends_with_extglob_prefix(
1061 &self,
1062 start: Position,
1063 capture: &Option<String>,
1064 ) -> bool {
1065 self.current_word_surface_last_char(start, capture)
1066 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1067 }
1068
1069 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1071 self.skip_whitespace();
1072 let start = self.current_position();
1073 let token = self.next_lexed_token_inner(false)?;
1074 let end = self.current_position();
1075 Some(token.with_span(Span::from_positions(start, end)))
1076 }
1077
1078 pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1080 self.skip_whitespace();
1081 let start = self.current_position();
1082 let token = self.next_lexed_token_inner(true)?;
1083 let end = self.current_position();
1084 Some(token.with_span(Span::from_positions(start, end)))
1085 }
1086
1087 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1089 let ch = self.peek_char()?;
1090
1091 match ch {
1092 '\n' => {
1093 self.consume_ascii_chars(1);
1094 Some(LexedToken::punctuation(TokenKind::Newline))
1095 }
1096 ';' => {
1097 if self.second_char() == Some(';') {
1098 if self.third_char() == Some('&') {
1099 self.consume_ascii_chars(3);
1100 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1102 self.consume_ascii_chars(2);
1103 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1105 } else if self.second_char() == Some('|') {
1106 self.consume_ascii_chars(2);
1107 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1109 self.consume_ascii_chars(2);
1110 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1112 self.consume_ascii_chars(1);
1113 Some(LexedToken::punctuation(TokenKind::Semicolon))
1114 }
1115 }
1116 '|' => {
1117 if self.second_char() == Some('|') {
1118 self.consume_ascii_chars(2);
1119 Some(LexedToken::punctuation(TokenKind::Or))
1120 } else if self.second_char() == Some('&') {
1121 self.consume_ascii_chars(2);
1122 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1123 } else {
1124 self.consume_ascii_chars(1);
1125 Some(LexedToken::punctuation(TokenKind::Pipe))
1126 }
1127 }
1128 '&' => {
1129 if self.second_char() == Some('&') {
1130 self.consume_ascii_chars(2);
1131 Some(LexedToken::punctuation(TokenKind::And))
1132 } else if self.second_char() == Some('>') {
1133 if self.third_char() == Some('>') {
1134 self.consume_ascii_chars(3);
1135 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1136 } else {
1137 self.consume_ascii_chars(2);
1138 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1139 }
1140 } else if self.second_char() == Some('|') {
1141 self.consume_ascii_chars(2);
1142 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1143 } else if self.second_char() == Some('!') {
1144 self.consume_ascii_chars(2);
1145 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1146 } else {
1147 self.consume_ascii_chars(1);
1148 Some(LexedToken::punctuation(TokenKind::Background))
1149 }
1150 }
1151 '>' => {
1152 if self.second_char() == Some('>') {
1153 if self.third_char() == Some('|') {
1154 self.consume_ascii_chars(3);
1155 } else {
1156 self.consume_ascii_chars(2);
1157 }
1158 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1159 } else if self.second_char() == Some('|') {
1160 self.consume_ascii_chars(2);
1161 Some(LexedToken::punctuation(TokenKind::Clobber))
1162 } else if self.second_char() == Some('(') {
1163 self.consume_ascii_chars(2);
1164 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1165 } else if self.second_char() == Some('&') {
1166 self.consume_ascii_chars(2);
1167 Some(LexedToken::punctuation(TokenKind::DupOutput))
1168 } else {
1169 self.consume_ascii_chars(1);
1170 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1171 }
1172 }
1173 '<' => {
1174 if self.second_char() == Some('<') {
1175 if self.third_char() == Some('<') {
1176 self.consume_ascii_chars(3);
1177 Some(LexedToken::punctuation(TokenKind::HereString))
1178 } else if self.third_char() == Some('-') {
1179 self.consume_ascii_chars(3);
1180 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1181 } else {
1182 self.consume_ascii_chars(2);
1183 Some(LexedToken::punctuation(TokenKind::HereDoc))
1184 }
1185 } else if self.second_char() == Some('>') {
1186 self.consume_ascii_chars(2);
1187 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1188 } else if self.second_char() == Some('(') {
1189 self.consume_ascii_chars(2);
1190 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1191 } else if self.second_char() == Some('&') {
1192 self.consume_ascii_chars(2);
1193 Some(LexedToken::punctuation(TokenKind::DupInput))
1194 } else {
1195 self.consume_ascii_chars(1);
1196 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1197 }
1198 }
1199 '(' => {
1200 if self.second_char() == Some('(') {
1201 self.consume_ascii_chars(2);
1202 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1203 } else {
1204 self.consume_ascii_chars(1);
1205 Some(LexedToken::punctuation(TokenKind::LeftParen))
1206 }
1207 }
1208 ')' => {
1209 if self.second_char() == Some(')') {
1210 self.consume_ascii_chars(2);
1211 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1212 } else {
1213 self.consume_ascii_chars(1);
1214 Some(LexedToken::punctuation(TokenKind::RightParen))
1215 }
1216 }
1217 '{' => {
1218 let start = self.current_position();
1219 if self.ignore_braces_enabled() {
1220 self.consume_ascii_chars(1);
1221 match self.peek_char() {
1222 Some(' ') | Some('\t') | Some('\n') | None => {
1223 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1224 }
1225 _ => self.read_word_starting_with("{", start),
1226 }
1227 } else if self.looks_like_brace_expansion() {
1228 self.read_brace_expansion_word()
1232 } else if self.is_brace_group_start() {
1233 self.advance();
1234 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1235 } else if self.brace_literal_starts_case_pattern_delimiter() {
1236 self.read_word_starting_with("{", start)
1237 } else {
1238 self.read_brace_literal_word()
1239 }
1240 }
1241 '}' => {
1242 self.consume_ascii_chars(1);
1243 if self.ignore_close_braces_enabled() {
1244 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1245 } else {
1246 Some(LexedToken::punctuation(TokenKind::RightBrace))
1247 }
1248 }
1249 '[' => {
1250 let start = self.current_position();
1251 self.consume_ascii_chars(1);
1252 if self.peek_char() == Some('[')
1253 && matches!(
1254 self.second_char(),
1255 Some(' ') | Some('\t') | Some('\n') | None
1256 )
1257 {
1258 self.consume_ascii_chars(1);
1259 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1260 } else {
1261 match self.peek_char() {
1268 Some(' ') | Some('\t') | Some('\n') | None => {
1269 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1270 }
1271 _ => self.read_word_starting_with("[", start),
1272 }
1273 }
1274 }
1275 ']' => {
1276 if self.second_char() == Some(']') {
1277 self.consume_ascii_chars(2);
1278 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1279 } else {
1280 self.consume_ascii_chars(1);
1281 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1282 }
1283 }
1284 '\'' => self.read_single_quoted_string(),
1285 '"' => self.read_double_quoted_string(),
1286 '#' => {
1287 if self.should_treat_hash_as_word_char() {
1288 let start = self.current_position();
1289 return self.read_word_starting_with("#", start);
1290 }
1291 if preserve_comments {
1292 self.read_comment();
1293 Some(LexedToken::comment())
1294 } else {
1295 self.skip_comment();
1296 self.next_lexed_token_inner(false)
1297 }
1298 }
1299 '0'..='9' => self.read_word_or_fd_redirect(),
1301 _ => self.read_word(),
1302 }
1303 }
1304
1305 fn skip_whitespace(&mut self) {
1306 while let Some(ch) = self.peek_char() {
1307 if self.reinject_buf.is_empty() {
1308 let whitespace_len = self.source_horizontal_whitespace_len();
1309 if whitespace_len > 0 {
1310 self.consume_source_bytes(whitespace_len);
1311 continue;
1312 }
1313
1314 if self.cursor.rest().starts_with("\\\n") {
1315 self.consume_source_bytes(2);
1316 continue;
1317 }
1318 }
1319
1320 if ch == ' ' || ch == '\t' {
1321 self.consume_ascii_chars(1);
1322 } else if ch == '\\' {
1323 if self.second_char() == Some('\n') {
1325 self.consume_ascii_chars(2);
1326 } else {
1327 break;
1328 }
1329 } else {
1330 break;
1331 }
1332 }
1333 }
1334
1335 fn skip_comment(&mut self) {
1336 if self.reinject_buf.is_empty() {
1337 let end = self
1338 .cursor
1339 .find_byte(b'\n')
1340 .unwrap_or(self.cursor.rest().len());
1341 self.consume_source_bytes(end);
1342 return;
1343 }
1344
1345 while let Some(ch) = self.peek_char() {
1346 if ch == '\n' {
1347 break;
1348 }
1349 self.advance();
1350 }
1351 }
1352
1353 fn read_comment(&mut self) {
1354 debug_assert_eq!(self.peek_char(), Some('#'));
1355
1356 if self.reinject_buf.is_empty() {
1357 let rest = self.cursor.rest();
1358 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1359 self.consume_source_bytes(end);
1360 return;
1361 }
1362
1363 self.advance(); while let Some(ch) = self.peek_char() {
1366 if ch == '\n' {
1367 break;
1368 }
1369 self.advance();
1370 }
1371 }
1372
1373 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1374 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1375 return false;
1376 }
1377
1378 let line_start = self.input[..self.offset]
1379 .rfind('\n')
1380 .map_or(0, |index| index + 1);
1381 let prefix = &self.input[line_start..self.offset];
1382 line_has_unclosed_double_paren(prefix)
1383 }
1384
1385 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1388 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1389 let Some(fd) = first_digit.to_digit(10) else {
1390 unreachable!("peeked ASCII digit should convert to a base-10 digit");
1391 };
1392 let fd = fd as i32;
1393
1394 match (self.second_char(), self.third_char()) {
1395 (Some('>'), Some('>')) => {
1396 if self.fourth_char() == Some('|') {
1397 self.consume_ascii_chars(4);
1398 } else {
1399 self.consume_ascii_chars(3);
1400 }
1401 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1402 }
1403 (Some('>'), Some('|')) => {
1404 self.consume_ascii_chars(3);
1405 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1406 }
1407 (Some('>'), Some('&')) => {
1408 self.consume_ascii_chars(3);
1409
1410 let mut target_str = String::with_capacity(4);
1411 while let Some(c) = self.peek_char() {
1412 if c.is_ascii_digit() {
1413 target_str.push(c);
1414 self.advance();
1415 } else {
1416 break;
1417 }
1418 }
1419
1420 if target_str.is_empty() {
1421 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1422 }
1423
1424 let target_fd: i32 = target_str.parse().unwrap_or(1);
1425 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1426 }
1427 (Some('>'), _) => {
1428 self.consume_ascii_chars(2);
1429 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1430 }
1431 (Some('<'), Some('&')) => {
1432 self.consume_ascii_chars(3);
1433
1434 let mut target_str = String::with_capacity(4);
1435 while let Some(c) = self.peek_char() {
1436 if c.is_ascii_digit() || c == '-' {
1437 target_str.push(c);
1438 self.advance();
1439 if c == '-' {
1440 break;
1441 }
1442 } else {
1443 break;
1444 }
1445 }
1446
1447 if target_str == "-" {
1448 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1449 }
1450 let target_fd: i32 = target_str.parse().unwrap_or(0);
1451 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1452 }
1453 (Some('<'), Some('>')) => {
1454 self.consume_ascii_chars(3);
1455 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1456 }
1457 (Some('<'), Some('<')) => {}
1458 (Some('<'), _) => {
1459 self.consume_ascii_chars(2);
1460 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1461 }
1462 _ => {}
1463 }
1464 }
1465
1466 self.read_word()
1468 }
1469
1470 fn read_word_starting_with(
1471 &mut self,
1472 _prefix: &str,
1473 start: Position,
1474 ) -> Option<LexedToken<'a>> {
1475 let segment = match self.read_unquoted_segment(start) {
1476 Ok(segment) => segment,
1477 Err(kind) => return Some(LexedToken::error(kind)),
1478 };
1479 if segment.as_str().is_empty() {
1480 return None;
1481 }
1482 let mut lexed_word = LexedWord::from_segment(segment);
1483 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1484 return Some(LexedToken::error(kind));
1485 }
1486 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1487 }
1488
1489 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1490 let start = self.current_position();
1491
1492 if self.reinject_buf.is_empty() {
1493 let ascii_len = self.source_ascii_plain_word_len();
1494 let chunk = if ascii_len > 0
1495 && self
1496 .cursor
1497 .rest()
1498 .as_bytes()
1499 .get(ascii_len)
1500 .is_none_or(|byte| byte.is_ascii())
1501 {
1502 self.consume_source_bytes(ascii_len);
1503 &self.input[start.offset..self.offset]
1504 } else {
1505 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1506 self.advance_scanned_source_bytes(chunk.len());
1507 chunk
1508 };
1509 if !chunk.is_empty() {
1510 let continues = matches!(
1511 self.peek_char(),
1512 Some(next)
1513 if Self::is_word_char(next)
1514 || next == '$'
1515 || matches!(next, '\'' | '"')
1516 || next == '{'
1517 || (next == '\\' && self.second_char() == Some('\n'))
1518 || (next == '('
1519 && (chunk.ends_with('=')
1520 || Self::word_can_take_parenthesized_suffix(chunk)))
1521 );
1522
1523 if !continues {
1524 let end = self.current_position();
1525 return Some(LexedToken::borrowed_word(
1526 TokenKind::Word,
1527 &self.input[start.offset..self.offset],
1528 Some(Span::from_positions(start, end)),
1529 ));
1530 }
1531
1532 if self.peek_char() == Some('(')
1533 && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1534 {
1535 return self.read_complex_word(start);
1536 }
1537
1538 let end = self.current_position();
1539 return self.finish_segmented_word(LexedWord::borrowed(
1540 LexedWordSegmentKind::Plain,
1541 &self.input[start.offset..self.offset],
1542 Some(Span::from_positions(start, end)),
1543 ));
1544 }
1545 }
1546
1547 self.read_complex_word(start)
1548 }
1549
1550 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1551 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1552 return Some(LexedToken::error(kind));
1553 }
1554
1555 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1556 }
1557
1558 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1559 if self.peek_char() == Some('$') {
1560 match self.second_char() {
1561 Some('\'') => return self.read_dollar_single_quoted_string(),
1562 Some('"') => return self.read_dollar_double_quoted_string(),
1563 _ => {}
1564 }
1565 }
1566
1567 let segment = match self.read_unquoted_segment(start) {
1568 Ok(segment) => segment,
1569 Err(kind) => return Some(LexedToken::error(kind)),
1570 };
1571
1572 if segment.as_str().is_empty() {
1573 return None;
1574 }
1575
1576 self.finish_segmented_word(LexedWord::from_segment(segment))
1577 }
1578
1579 fn read_unquoted_segment(
1580 &mut self,
1581 start: Position,
1582 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1583 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1584 while let Some(ch) = self.peek_char() {
1585 if ch == '"' || ch == '\'' {
1586 break;
1587 } else if ch == '$' {
1588 if matches!(self.second_char(), Some('\'') | Some('"'))
1589 && (self.current_position().offset > start.offset
1590 || word.as_ref().is_some_and(|word| !word.is_empty()))
1591 {
1592 break;
1593 }
1594
1595 self.advance();
1597
1598 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1602 Self::push_capture_char(&mut word, '[');
1603 self.advance();
1604 if !self.read_legacy_arithmetic_into(&mut word, start) {
1605 return Err(LexerErrorKind::CommandSubstitution);
1606 }
1607 } else if self.peek_char() == Some('(') {
1608 if self.second_char() == Some('(') {
1609 if !self.read_arithmetic_expansion_into(&mut word) {
1610 return Err(LexerErrorKind::CommandSubstitution);
1611 }
1612 } else {
1613 Self::push_capture_char(&mut word, '(');
1614 self.advance();
1615 if !self.read_command_subst_into(&mut word) {
1616 return Err(LexerErrorKind::CommandSubstitution);
1617 }
1618 }
1619 } else if self.peek_char() == Some('{') {
1620 Self::push_capture_char(&mut word, '{');
1623 self.advance();
1624 let _ = self.read_param_expansion_into(&mut word, start);
1625 } else {
1626 if let Some(c) = self.peek_char() {
1628 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1629 || c.is_ascii_digit()
1630 {
1631 Self::push_capture_char(&mut word, c);
1632 self.advance();
1633 } else {
1634 while let Some(c) = self.peek_char() {
1636 if c.is_ascii_alphanumeric() || c == '_' {
1637 Self::push_capture_char(&mut word, c);
1638 self.advance();
1639 } else {
1640 break;
1641 }
1642 }
1643 }
1644 }
1645 }
1646 } else if ch == '{' {
1647 if self.looks_like_mid_word_brace_segment() {
1648 Self::push_capture_char(&mut word, ch);
1651 self.advance();
1652 self.consume_mid_word_brace_segment(&mut word);
1653 } else {
1654 Self::push_capture_char(&mut word, ch);
1657 self.advance();
1658 }
1659 } else if ch == '`' {
1660 let capture_end = self.current_position();
1663 self.ensure_capture_from_source(&mut word, start, capture_end);
1664 Self::push_capture_char(&mut word, ch);
1665 self.advance(); let mut closed = false;
1667 while let Some(c) = self.peek_char() {
1668 Self::push_capture_char(&mut word, c);
1669 self.advance();
1670 if c == '`' {
1671 closed = true;
1672 break;
1673 }
1674 if c == '\\'
1675 && let Some(next) = self.peek_char()
1676 {
1677 Self::push_capture_char(&mut word, next);
1678 self.advance();
1679 }
1680 }
1681 if !closed {
1682 return Err(LexerErrorKind::BacktickSubstitution);
1683 }
1684 } else if ch == '\\' {
1685 let capture_end = self.current_position();
1686 self.ensure_capture_from_source(&mut word, start, capture_end);
1687 self.advance();
1688 if let Some(next) = self.peek_char() {
1689 if next == '\n' {
1690 self.advance();
1692 } else {
1693 Self::push_capture_char(&mut word, '\x00');
1698 Self::push_capture_char(&mut word, next);
1699 self.advance();
1700 if next == '{'
1701 && self.current_word_surface_is_single_char(start, &word, '{')
1702 && self.escaped_brace_sequence_looks_like_brace_expansion()
1703 {
1704 let mut depth = 1;
1705 while let Some(c) = self.peek_char() {
1706 Self::push_capture_char(&mut word, c);
1707 self.advance();
1708 match c {
1709 '{' => depth += 1,
1710 '}' => {
1711 depth -= 1;
1712 if depth == 0 {
1713 break;
1714 }
1715 }
1716 _ => {}
1717 }
1718 }
1719 }
1720 }
1721 } else {
1722 Self::push_capture_char(&mut word, '\\');
1723 }
1724 } else if ch == '('
1725 && self.current_word_surface_ends_with_char(start, &word, '=')
1726 && self.looks_like_assoc_assign()
1727 {
1728 Self::push_capture_char(&mut word, ch);
1731 self.advance();
1732 let mut depth = 1;
1733 while let Some(c) = self.peek_char() {
1734 Self::push_capture_char(&mut word, c);
1735 self.advance();
1736 match c {
1737 '(' => depth += 1,
1738 ')' => {
1739 depth -= 1;
1740 if depth == 0 {
1741 break;
1742 }
1743 }
1744 '"' => {
1745 while let Some(qc) = self.peek_char() {
1746 Self::push_capture_char(&mut word, qc);
1747 self.advance();
1748 if qc == '"' {
1749 break;
1750 }
1751 if qc == '\\'
1752 && let Some(esc) = self.peek_char()
1753 {
1754 Self::push_capture_char(&mut word, esc);
1755 self.advance();
1756 }
1757 }
1758 }
1759 '\'' => {
1760 while let Some(qc) = self.peek_char() {
1761 Self::push_capture_char(&mut word, qc);
1762 self.advance();
1763 if qc == '\'' {
1764 break;
1765 }
1766 }
1767 }
1768 '\\' => {
1769 if let Some(esc) = self.peek_char() {
1770 Self::push_capture_char(&mut word, esc);
1771 self.advance();
1772 }
1773 }
1774 _ => {}
1775 }
1776 }
1777 } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1778 {
1779 Self::push_capture_char(&mut word, ch);
1782 self.advance();
1783 let mut depth = 1;
1784 while let Some(c) = self.peek_char() {
1785 Self::push_capture_char(&mut word, c);
1786 self.advance();
1787 match c {
1788 '(' => depth += 1,
1789 ')' => {
1790 depth -= 1;
1791 if depth == 0 {
1792 break;
1793 }
1794 }
1795 '\\' => {
1796 if let Some(esc) = self.peek_char() {
1797 Self::push_capture_char(&mut word, esc);
1798 self.advance();
1799 }
1800 }
1801 _ => {}
1802 }
1803 }
1804 } else if Self::is_plain_word_char(ch) {
1805 if self.reinject_buf.is_empty() {
1806 let ascii_len = self.source_ascii_plain_word_len();
1807 let chunk = if ascii_len > 0
1808 && self
1809 .cursor
1810 .rest()
1811 .as_bytes()
1812 .get(ascii_len)
1813 .is_none_or(|byte| byte.is_ascii())
1814 {
1815 self.consume_source_bytes(ascii_len);
1816 &self.input[self.offset - ascii_len..self.offset]
1817 } else {
1818 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1819 self.advance_scanned_source_bytes(chunk.len());
1820 chunk
1821 };
1822 Self::push_capture_str(&mut word, chunk);
1823 } else {
1824 Self::push_capture_char(&mut word, ch);
1825 self.advance();
1826 }
1827 } else {
1828 break;
1829 }
1830 }
1831
1832 if let Some(word) = word {
1833 let span = Some(Span::from_positions(start, self.current_position()));
1834 Ok(LexedWordSegment::owned_with_spans(
1835 LexedWordSegmentKind::Plain,
1836 word,
1837 span,
1838 span,
1839 ))
1840 } else {
1841 let end = self.current_position();
1842 Ok(LexedWordSegment::borrowed(
1843 LexedWordSegmentKind::Plain,
1844 &self.input[start.offset..self.offset],
1845 Some(Span::from_positions(start, end)),
1846 ))
1847 }
1848 }
1849
1850 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1851 let segment = match self.read_single_quoted_segment() {
1852 Ok(segment) => segment,
1853 Err(kind) => return Some(LexedToken::error(kind)),
1854 };
1855 let mut word = LexedWord::from_segment(segment);
1856 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1857 return Some(LexedToken::error(kind));
1858 }
1859
1860 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1861 }
1862
1863 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1864 debug_assert_eq!(self.peek_char(), Some('\''));
1865
1866 let wrapper_start = self.current_position();
1867 self.consume_ascii_chars(1); let content_start = self.current_position();
1869 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1870 let mut content_end = content_start;
1871 let mut content = String::with_capacity(16);
1872 let mut closed = false;
1873
1874 if can_borrow {
1875 let rest = self.cursor.rest();
1876 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1877 self.consume_source_bytes(quote_index);
1878 content_end = self.current_position();
1879 self.consume_ascii_chars(1); closed = true;
1881 } else {
1882 self.consume_source_bytes(rest.len());
1883 }
1884 }
1885
1886 while let Some(ch) = self.peek_char() {
1887 if closed {
1888 break;
1889 }
1890 if ch == '\'' {
1891 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1892 if !can_borrow {
1893 content.push('\'');
1894 }
1895 self.advance();
1896 self.advance();
1897 continue;
1898 }
1899 content_end = self.current_position();
1900 self.consume_ascii_chars(1); closed = true;
1902 break;
1903 }
1904 if !can_borrow {
1905 content.push(ch);
1906 }
1907 self.advance();
1908 }
1909
1910 if !closed {
1911 return Err(LexerErrorKind::SingleQuote);
1912 }
1913
1914 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1915 let content_span = Some(Span::from_positions(content_start, content_end));
1916
1917 if can_borrow {
1918 Ok(LexedWordSegment::borrowed_with_spans(
1919 LexedWordSegmentKind::SingleQuoted,
1920 &self.input[content_start.offset..content_end.offset],
1921 content_span,
1922 wrapper_span,
1923 ))
1924 } else {
1925 Ok(LexedWordSegment::owned_with_spans(
1926 LexedWordSegmentKind::SingleQuoted,
1927 content,
1928 content_span,
1929 wrapper_span,
1930 ))
1931 }
1932 }
1933
1934 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1935 let segment = match self.read_dollar_single_quoted_segment() {
1936 Ok(segment) => segment,
1937 Err(kind) => return Some(LexedToken::error(kind)),
1938 };
1939 let mut word = LexedWord::from_segment(segment);
1940 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1941 return Some(LexedToken::error(kind));
1942 }
1943
1944 let kind = if word.single_segment().is_some() {
1945 TokenKind::LiteralWord
1946 } else {
1947 TokenKind::Word
1948 };
1949
1950 Some(LexedToken::with_word_payload(kind, word))
1951 }
1952
1953 fn read_dollar_single_quoted_segment(
1954 &mut self,
1955 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1956 debug_assert_eq!(self.peek_char(), Some('$'));
1957 debug_assert_eq!(self.second_char(), Some('\''));
1958
1959 let wrapper_start = self.current_position();
1960 self.consume_ascii_chars(2); let content_start = self.current_position();
1962 let mut out = String::with_capacity(16);
1963
1964 while let Some(ch) = self.peek_char() {
1965 if ch == '\'' {
1966 let content_end = self.current_position();
1967 self.advance();
1968 let wrapper_span =
1969 Some(Span::from_positions(wrapper_start, self.current_position()));
1970 let content_span = Some(Span::from_positions(content_start, content_end));
1971 return Ok(LexedWordSegment::owned_with_spans(
1972 LexedWordSegmentKind::DollarSingleQuoted,
1973 out,
1974 content_span,
1975 wrapper_span,
1976 ));
1977 }
1978
1979 if ch == '\\' {
1980 self.advance();
1981 if let Some(esc) = self.peek_char() {
1982 self.advance();
1983 match esc {
1984 'n' => out.push('\n'),
1985 't' => out.push('\t'),
1986 'r' => out.push('\r'),
1987 'a' => out.push('\x07'),
1988 'b' => out.push('\x08'),
1989 'f' => out.push('\x0C'),
1990 'v' => out.push('\x0B'),
1991 'e' | 'E' => out.push('\x1B'),
1992 '\\' => out.push('\\'),
1993 '\'' => out.push('\''),
1994 '"' => out.push('"'),
1995 '?' => out.push('?'),
1996 'c' => {
1997 if let Some(control) = self.peek_char() {
1998 self.advance();
1999 out.push(((control as u32 & 0x1F) as u8) as char);
2000 } else {
2001 out.push('\\');
2002 out.push('c');
2003 }
2004 }
2005 'x' => {
2006 let mut hex = String::new();
2007 for _ in 0..2 {
2008 if let Some(h) = self.peek_char() {
2009 if h.is_ascii_hexdigit() {
2010 hex.push(h);
2011 self.advance();
2012 } else {
2013 break;
2014 }
2015 }
2016 }
2017 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2018 out.push(val as char);
2019 }
2020 }
2021 'u' => {
2022 let mut hex = String::new();
2023 for _ in 0..4 {
2024 if let Some(h) = self.peek_char() {
2025 if h.is_ascii_hexdigit() {
2026 hex.push(h);
2027 self.advance();
2028 } else {
2029 break;
2030 }
2031 }
2032 }
2033 if let Ok(val) = u32::from_str_radix(&hex, 16)
2034 && let Some(c) = char::from_u32(val)
2035 {
2036 out.push(c);
2037 }
2038 }
2039 'U' => {
2040 let mut hex = String::new();
2041 for _ in 0..8 {
2042 if let Some(h) = self.peek_char() {
2043 if h.is_ascii_hexdigit() {
2044 hex.push(h);
2045 self.advance();
2046 } else {
2047 break;
2048 }
2049 }
2050 }
2051 if let Ok(val) = u32::from_str_radix(&hex, 16)
2052 && let Some(c) = char::from_u32(val)
2053 {
2054 out.push(c);
2055 }
2056 }
2057 '0'..='7' => {
2058 let mut oct = String::new();
2059 oct.push(esc);
2060 for _ in 0..2 {
2061 if let Some(o) = self.peek_char() {
2062 if o.is_ascii_digit() && o < '8' {
2063 oct.push(o);
2064 self.advance();
2065 } else {
2066 break;
2067 }
2068 }
2069 }
2070 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2071 out.push(val as char);
2072 }
2073 }
2074 _ => {
2075 out.push('\\');
2076 out.push(esc);
2077 }
2078 }
2079 } else {
2080 out.push('\\');
2081 }
2082 continue;
2083 }
2084
2085 out.push(ch);
2086 self.advance();
2087 }
2088
2089 Err(LexerErrorKind::SingleQuote)
2090 }
2091
2092 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2093 let start = self.current_position();
2094
2095 if self.reinject_buf.is_empty() {
2096 let ascii_len = self.source_ascii_plain_word_len();
2097 let chunk = if ascii_len > 0
2098 && self
2099 .cursor
2100 .rest()
2101 .as_bytes()
2102 .get(ascii_len)
2103 .is_none_or(|byte| byte.is_ascii())
2104 {
2105 self.consume_source_bytes(ascii_len);
2106 &self.input[start.offset..self.offset]
2107 } else {
2108 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2109 self.advance_scanned_source_bytes(chunk.len());
2110 chunk
2111 };
2112 if chunk.is_empty() {
2113 return None;
2114 }
2115
2116 let end = self.current_position();
2117 return Some(LexedWordSegment::borrowed(
2118 LexedWordSegmentKind::Plain,
2119 &self.input[start.offset..self.offset],
2120 Some(Span::from_positions(start, end)),
2121 ));
2122 }
2123
2124 let ch = self.peek_char()?;
2125 if !Self::is_plain_word_char(ch) {
2126 return None;
2127 }
2128
2129 let mut text = String::with_capacity(16);
2130 while let Some(ch) = self.peek_char() {
2131 if !Self::is_plain_word_char(ch) {
2132 break;
2133 }
2134 text.push(ch);
2135 self.advance();
2136 }
2137
2138 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2139 }
2140
2141 fn append_segmented_continuation(
2144 &mut self,
2145 word: &mut LexedWord<'a>,
2146 ) -> Result<(), LexerErrorKind> {
2147 loop {
2148 match self.peek_char() {
2149 Some('\\') if self.second_char() == Some('\n') => {
2150 self.advance();
2151 self.advance();
2152 continue;
2153 }
2154 Some('\'') => {
2155 word.push_segment(self.read_single_quoted_segment()?);
2156 }
2157 Some('"') => {
2158 word.push_segment(self.read_double_quoted_segment()?);
2159 }
2160 Some('$') if self.second_char() == Some('\'') => {
2161 word.push_segment(self.read_dollar_single_quoted_segment()?);
2162 }
2163 Some('$') if self.second_char() == Some('"') => {
2164 word.push_segment(self.read_dollar_double_quoted_segment()?);
2165 }
2166 Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2167 let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2168 unreachable!("peeked '(' should produce a suffix segment");
2169 };
2170 word.push_segment(segment);
2171 }
2172 _ => {
2173 if let Some(segment) = self.read_plain_continuation_segment() {
2174 word.push_segment(segment);
2175 continue;
2176 }
2177
2178 let start = self.current_position();
2179 let plain = self.read_unquoted_segment(start)?;
2180 if plain.as_str().is_empty() {
2181 break;
2182 }
2183 word.push_segment(plain);
2184 }
2185 }
2186 }
2187
2188 Ok(())
2189 }
2190
2191 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2192 debug_assert_eq!(self.peek_char(), Some('('));
2193
2194 let start = self.current_position();
2195 let mut depth = 0usize;
2196 let mut escaped = false;
2197 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2198
2199 while let Some(ch) = self.peek_char() {
2200 if let Some(text) = text.as_mut() {
2201 text.push(ch);
2202 }
2203 self.advance();
2204
2205 if escaped {
2206 escaped = false;
2207 continue;
2208 }
2209
2210 match ch {
2211 '\\' => escaped = true,
2212 '(' => depth += 1,
2213 ')' => {
2214 depth = depth.saturating_sub(1);
2215 if depth == 0 {
2216 break;
2217 }
2218 }
2219 _ => {}
2220 }
2221 }
2222
2223 let end = self.current_position();
2224 let span = Some(Span::from_positions(start, end));
2225 if let Some(text) = text {
2226 Some(LexedWordSegment::owned_with_spans(
2227 LexedWordSegmentKind::Plain,
2228 text,
2229 span,
2230 span,
2231 ))
2232 } else {
2233 Some(LexedWordSegment::borrowed_with_spans(
2234 LexedWordSegmentKind::Plain,
2235 &self.input[start.offset..end.offset],
2236 span,
2237 span,
2238 ))
2239 }
2240 }
2241
2242 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2243 self.read_double_quoted_word(false)
2244 }
2245
2246 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2247 self.read_double_quoted_word(true)
2248 }
2249
2250 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2251 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2252 Ok(segment) => segment,
2253 Err(kind) => return Some(LexedToken::error(kind)),
2254 };
2255 let mut word = LexedWord::from_segment(segment);
2256 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2257 return Some(LexedToken::error(kind));
2258 }
2259
2260 let kind = if word.single_segment().is_some() {
2261 TokenKind::QuotedWord
2262 } else {
2263 TokenKind::Word
2264 };
2265
2266 Some(LexedToken::with_word_payload(kind, word))
2267 }
2268
2269 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2270 self.read_double_quoted_segment_with_dollar(false)
2271 }
2272
2273 fn read_dollar_double_quoted_segment(
2274 &mut self,
2275 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2276 self.read_double_quoted_segment_with_dollar(true)
2277 }
2278
2279 fn read_double_quoted_segment_with_dollar(
2280 &mut self,
2281 dollar: bool,
2282 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2283 if dollar {
2284 debug_assert_eq!(self.peek_char(), Some('$'));
2285 debug_assert_eq!(self.second_char(), Some('"'));
2286 } else {
2287 debug_assert_eq!(self.peek_char(), Some('"'));
2288 }
2289
2290 let wrapper_start = self.current_position();
2291 if dollar {
2292 self.consume_ascii_chars(2); } else {
2294 self.consume_ascii_chars(1); }
2296 let content_start = self.current_position();
2297 let mut content_end = content_start;
2298 let mut simple = self.reinject_buf.is_empty();
2299 let mut borrowable = self.reinject_buf.is_empty();
2300 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2301 let mut closed = false;
2302
2303 while let Some(ch) = self.peek_char() {
2304 if simple {
2305 if self.reinject_buf.is_empty() {
2306 let rest = self.cursor.rest();
2307 match Self::find_double_quote_special(rest) {
2308 Some(index) if index > 0 => {
2309 self.consume_source_bytes(index);
2310 continue;
2311 }
2312 None => {
2313 self.consume_source_bytes(rest.len());
2314 return Err(LexerErrorKind::DoubleQuote);
2315 }
2316 _ => {}
2317 }
2318 }
2319
2320 match ch {
2321 '"' => {
2322 content_end = self.current_position();
2323 self.consume_ascii_chars(1); closed = true;
2325 break;
2326 }
2327 '\\' | '$' | '`' => {
2328 simple = false;
2329 if ch == '`' {
2330 borrowable = false;
2331 let capture_end = self.current_position();
2332 self.ensure_capture_from_source(
2333 &mut content,
2334 content_start,
2335 capture_end,
2336 );
2337 }
2338 }
2339 _ => {
2340 self.advance();
2341 }
2342 }
2343 if simple {
2344 continue;
2345 }
2346 }
2347
2348 match ch {
2349 '"' => {
2350 if borrowable {
2351 content_end = self.current_position();
2352 }
2353 self.consume_ascii_chars(1); closed = true;
2355 break;
2356 }
2357 '\\' => {
2358 let escape_start = self.current_position();
2359 self.advance();
2360 if let Some(next) = self.peek_char() {
2361 match next {
2362 '\n' => {
2363 borrowable = false;
2364 self.ensure_capture_from_source(
2365 &mut content,
2366 content_start,
2367 escape_start,
2368 );
2369 self.advance();
2370 }
2371 '$' => {
2372 borrowable = false;
2373 self.ensure_capture_from_source(
2374 &mut content,
2375 content_start,
2376 escape_start,
2377 );
2378 Self::push_capture_char(&mut content, '\x00');
2379 Self::push_capture_char(&mut content, '$');
2380 self.advance();
2381 }
2382 '"' | '\\' | '`' => {
2383 borrowable = false;
2384 self.ensure_capture_from_source(
2385 &mut content,
2386 content_start,
2387 escape_start,
2388 );
2389 if next == '\\' {
2390 Self::push_capture_char(&mut content, '\x00');
2391 }
2392 if next == '`' {
2393 Self::push_capture_char(&mut content, '\x00');
2394 }
2395 Self::push_capture_char(&mut content, next);
2396 self.advance();
2397 content_end = self.current_position();
2398 }
2399 _ => {
2400 Self::push_capture_char(&mut content, '\\');
2401 Self::push_capture_char(&mut content, next);
2402 self.advance();
2403 content_end = self.current_position();
2404 }
2405 }
2406 }
2407 }
2408 '$' => {
2409 Self::push_capture_char(&mut content, '$');
2410 self.advance();
2411 if self.peek_char() == Some('(') {
2412 if self.second_char() == Some('(') {
2413 self.read_arithmetic_expansion_into(&mut content);
2414 } else {
2415 Self::push_capture_char(&mut content, '(');
2416 self.advance();
2417 self.read_command_subst_into(&mut content);
2418 }
2419 } else if self.peek_char() == Some('{') {
2420 Self::push_capture_char(&mut content, '{');
2421 self.advance();
2422 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2423 }
2424 content_end = self.current_position();
2425 }
2426 '`' => {
2427 borrowable = false;
2428 let capture_end = self.current_position();
2429 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2430 Self::push_capture_char(&mut content, '`');
2431 self.advance(); while let Some(c) = self.peek_char() {
2433 Self::push_capture_char(&mut content, c);
2434 self.advance();
2435 if c == '`' {
2436 break;
2437 }
2438 if c == '\\'
2439 && let Some(next) = self.peek_char()
2440 {
2441 Self::push_capture_char(&mut content, next);
2442 self.advance();
2443 }
2444 }
2445 content_end = self.current_position();
2446 }
2447 _ => {
2448 Self::push_capture_char(&mut content, ch);
2449 self.advance();
2450 content_end = self.current_position();
2451 }
2452 }
2453 }
2454
2455 if !closed {
2456 return Err(LexerErrorKind::DoubleQuote);
2457 }
2458
2459 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2460 let content_span = Some(Span::from_positions(content_start, content_end));
2461
2462 if borrowable {
2463 Ok(LexedWordSegment::borrowed_with_spans(
2464 if dollar {
2465 LexedWordSegmentKind::DollarDoubleQuoted
2466 } else {
2467 LexedWordSegmentKind::DoubleQuoted
2468 },
2469 &self.input[content_start.offset..content_end.offset],
2470 content_span,
2471 wrapper_span,
2472 ))
2473 } else {
2474 Ok(LexedWordSegment::owned_with_spans(
2475 if dollar {
2476 LexedWordSegmentKind::DollarDoubleQuoted
2477 } else {
2478 LexedWordSegmentKind::DoubleQuoted
2479 },
2480 content.unwrap_or_default(),
2481 content_span,
2482 wrapper_span,
2483 ))
2484 }
2485 }
2486
2487 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2488 debug_assert_eq!(self.peek_char(), Some('('));
2489 debug_assert_eq!(self.second_char(), Some('('));
2490
2491 Self::push_capture_char(content, '(');
2492 self.advance();
2493 Self::push_capture_char(content, '(');
2494 self.advance();
2495
2496 let mut depth = 2;
2497 while let Some(c) = self.peek_char() {
2498 match c {
2499 '\\' => {
2500 Self::push_capture_char(content, c);
2501 self.advance();
2502 if let Some(next) = self.peek_char() {
2503 Self::push_capture_char(content, next);
2504 self.advance();
2505 }
2506 }
2507 '\'' => {
2508 Self::push_capture_char(content, c);
2509 self.advance();
2510 while let Some(quoted) = self.peek_char() {
2511 Self::push_capture_char(content, quoted);
2512 self.advance();
2513 if quoted == '\'' {
2514 break;
2515 }
2516 }
2517 }
2518 '"' => {
2519 let mut escaped = false;
2520 Self::push_capture_char(content, c);
2521 self.advance();
2522 while let Some(quoted) = self.peek_char() {
2523 Self::push_capture_char(content, quoted);
2524 self.advance();
2525 if escaped {
2526 escaped = false;
2527 continue;
2528 }
2529 match quoted {
2530 '\\' => escaped = true,
2531 '"' => break,
2532 _ => {}
2533 }
2534 }
2535 }
2536 '`' => {
2537 let mut escaped = false;
2538 Self::push_capture_char(content, c);
2539 self.advance();
2540 while let Some(quoted) = self.peek_char() {
2541 Self::push_capture_char(content, quoted);
2542 self.advance();
2543 if escaped {
2544 escaped = false;
2545 continue;
2546 }
2547 match quoted {
2548 '\\' => escaped = true,
2549 '`' => break,
2550 _ => {}
2551 }
2552 }
2553 }
2554 '(' => {
2555 Self::push_capture_char(content, c);
2556 self.advance();
2557 depth += 1;
2558 }
2559 ')' => {
2560 Self::push_capture_char(content, c);
2561 self.advance();
2562 depth -= 1;
2563 if depth == 0 {
2564 return true;
2565 }
2566 }
2567 _ => {
2568 Self::push_capture_char(content, c);
2569 self.advance();
2570 }
2571 }
2572 }
2573
2574 false
2575 }
2576
2577 fn read_legacy_arithmetic_into(
2578 &mut self,
2579 content: &mut Option<String>,
2580 segment_start: Position,
2581 ) -> bool {
2582 let mut bracket_depth = 1;
2583
2584 while let Some(c) = self.peek_char() {
2585 match c {
2586 '\\' => {
2587 Self::push_capture_char(content, c);
2588 self.advance();
2589 if let Some(next) = self.peek_char() {
2590 Self::push_capture_char(content, next);
2591 self.advance();
2592 }
2593 }
2594 '\'' => {
2595 Self::push_capture_char(content, c);
2596 self.advance();
2597 while let Some(quoted) = self.peek_char() {
2598 Self::push_capture_char(content, quoted);
2599 self.advance();
2600 if quoted == '\'' {
2601 break;
2602 }
2603 }
2604 }
2605 '"' => {
2606 let mut escaped = false;
2607 Self::push_capture_char(content, c);
2608 self.advance();
2609 while let Some(quoted) = self.peek_char() {
2610 Self::push_capture_char(content, quoted);
2611 self.advance();
2612 if escaped {
2613 escaped = false;
2614 continue;
2615 }
2616 match quoted {
2617 '\\' => escaped = true,
2618 '"' => break,
2619 _ => {}
2620 }
2621 }
2622 }
2623 '`' => {
2624 let mut escaped = false;
2625 Self::push_capture_char(content, c);
2626 self.advance();
2627 while let Some(quoted) = self.peek_char() {
2628 Self::push_capture_char(content, quoted);
2629 self.advance();
2630 if escaped {
2631 escaped = false;
2632 continue;
2633 }
2634 match quoted {
2635 '\\' => escaped = true,
2636 '`' => break,
2637 _ => {}
2638 }
2639 }
2640 }
2641 '[' => {
2642 Self::push_capture_char(content, c);
2643 self.advance();
2644 bracket_depth += 1;
2645 }
2646 ']' => {
2647 Self::push_capture_char(content, c);
2648 self.advance();
2649 bracket_depth -= 1;
2650 if bracket_depth == 0 {
2651 return true;
2652 }
2653 }
2654 '$' => {
2655 Self::push_capture_char(content, c);
2656 self.advance();
2657 if self.peek_char() == Some('(') {
2658 if self.second_char() == Some('(') {
2659 if !self.read_arithmetic_expansion_into(content) {
2660 return false;
2661 }
2662 } else {
2663 Self::push_capture_char(content, '(');
2664 self.advance();
2665 if !self.read_command_subst_into(content) {
2666 return false;
2667 }
2668 }
2669 } else if self.peek_char() == Some('{') {
2670 Self::push_capture_char(content, '{');
2671 self.advance();
2672 if !self.read_param_expansion_into(content, segment_start) {
2673 return false;
2674 }
2675 } else if self.peek_char() == Some('[') {
2676 Self::push_capture_char(content, '[');
2677 self.advance();
2678 if !self.read_legacy_arithmetic_into(content, segment_start) {
2679 return false;
2680 }
2681 }
2682 }
2683 _ => {
2684 Self::push_capture_char(content, c);
2685 self.advance();
2686 }
2687 }
2688 }
2689
2690 false
2691 }
2692
2693 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2697 self.read_command_subst_into_depth(content, 0)
2698 }
2699
2700 fn flush_command_subst_keyword(
2701 current_word: &mut String,
2702 pending_case_headers: &mut usize,
2703 case_clause_depths: &mut SmallVec<[usize; 4]>,
2704 depth: usize,
2705 word_started_at_command_start: &mut bool,
2706 ) {
2707 if current_word.is_empty() {
2708 *word_started_at_command_start = false;
2709 return;
2710 }
2711
2712 match current_word.as_str() {
2713 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2714 "in" if *pending_case_headers > 0 => {
2715 *pending_case_headers -= 1;
2716 case_clause_depths.push(depth);
2717 }
2718 "esac" if *word_started_at_command_start => {
2719 case_clause_depths.pop();
2720 }
2721 _ => {}
2722 }
2723
2724 current_word.clear();
2725 *word_started_at_command_start = false;
2726 }
2727
2728 fn read_command_subst_heredoc_delimiter_into(
2729 &mut self,
2730 content: &mut Option<String>,
2731 ) -> Option<String> {
2732 while let Some(ch) = self.peek_char() {
2733 if !matches!(ch, ' ' | '\t') {
2734 break;
2735 }
2736 Self::push_capture_char(content, ch);
2737 self.advance();
2738 }
2739
2740 let mut cooked = String::new();
2741 let mut in_single = false;
2742 let mut in_double = false;
2743 let mut escaped = false;
2744 let mut saw_any = false;
2745
2746 while let Some(ch) = self.peek_char() {
2747 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2748 break;
2749 }
2750
2751 saw_any = true;
2752 Self::push_capture_char(content, ch);
2753 self.advance();
2754
2755 if escaped {
2756 cooked.push(ch);
2757 escaped = false;
2758 continue;
2759 }
2760
2761 match ch {
2762 '\\' if !in_single => escaped = true,
2763 '\'' if !in_double => in_single = !in_single,
2764 '"' if !in_single => in_double = !in_double,
2765 _ => cooked.push(ch),
2766 }
2767 }
2768
2769 saw_any.then_some(cooked)
2770 }
2771
2772 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2773 Self::push_capture_char(content, '`');
2774 self.advance();
2775 while let Some(ch) = self.peek_char() {
2776 Self::push_capture_char(content, ch);
2777 self.advance();
2778 if ch == '\\' {
2779 if let Some(esc) = self.peek_char() {
2780 Self::push_capture_char(content, esc);
2781 self.advance();
2782 }
2783 continue;
2784 }
2785 if ch == '`' {
2786 break;
2787 }
2788 }
2789 }
2790
2791 fn read_command_subst_pending_heredoc_into(
2792 &mut self,
2793 content: &mut Option<String>,
2794 delimiter: &str,
2795 strip_tabs: bool,
2796 ) -> bool {
2797 loop {
2798 let mut line = String::new();
2799 let mut saw_newline = false;
2800
2801 while let Some(ch) = self.peek_char() {
2802 self.advance();
2803 if ch == '\n' {
2804 saw_newline = true;
2805 break;
2806 }
2807 line.push(ch);
2808 }
2809
2810 Self::push_capture_str(content, &line);
2811 if saw_newline {
2812 Self::push_capture_char(content, '\n');
2813 }
2814
2815 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2816 return true;
2817 }
2818 }
2819 }
2820
2821 fn read_command_subst_into_depth(
2822 &mut self,
2823 content: &mut Option<String>,
2824 subst_depth: usize,
2825 ) -> bool {
2826 if subst_depth >= self.max_subst_depth {
2827 let mut depth = 1;
2829 while let Some(c) = self.peek_char() {
2830 self.advance();
2831 match c {
2832 '(' => depth += 1,
2833 ')' => {
2834 depth -= 1;
2835 if depth == 0 {
2836 Self::push_capture_char(content, ')');
2837 return true;
2838 }
2839 }
2840 _ => {}
2841 }
2842 }
2843 return false;
2844 }
2845
2846 let mut depth = 1;
2847 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2848 let mut pending_case_headers = 0usize;
2849 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2850 let mut current_word = String::with_capacity(16);
2851 let mut at_command_start = true;
2852 let mut expecting_redirection_target = false;
2853 let mut current_word_started_at_command_start = false;
2854 while let Some(c) = self.peek_char() {
2855 match c {
2856 '#' if !self.should_treat_hash_as_word_char() => {
2857 let had_word = !current_word.is_empty();
2858 Self::flush_command_subst_keyword(
2859 &mut current_word,
2860 &mut pending_case_headers,
2861 &mut case_clause_depths,
2862 depth,
2863 &mut current_word_started_at_command_start,
2864 );
2865 if had_word && expecting_redirection_target {
2866 expecting_redirection_target = false;
2867 }
2868 Self::push_capture_char(content, '#');
2869 self.advance();
2870 while let Some(comment_ch) = self.peek_char() {
2871 Self::push_capture_char(content, comment_ch);
2872 self.advance();
2873 if comment_ch == '\n' {
2874 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2875 if !self.read_command_subst_pending_heredoc_into(
2876 content, &delimiter, strip_tabs,
2877 ) {
2878 return false;
2879 }
2880 }
2881 at_command_start = true;
2882 expecting_redirection_target = false;
2883 break;
2884 }
2885 }
2886 }
2887 '(' => {
2888 Self::flush_command_subst_keyword(
2889 &mut current_word,
2890 &mut pending_case_headers,
2891 &mut case_clause_depths,
2892 depth,
2893 &mut current_word_started_at_command_start,
2894 );
2895 depth += 1;
2896 Self::push_capture_char(content, c);
2897 self.advance();
2898 at_command_start = true;
2899 expecting_redirection_target = false;
2900 }
2901 ')' => {
2902 Self::flush_command_subst_keyword(
2903 &mut current_word,
2904 &mut pending_case_headers,
2905 &mut case_clause_depths,
2906 depth,
2907 &mut current_word_started_at_command_start,
2908 );
2909 if case_clause_depths
2910 .last()
2911 .is_some_and(|case_depth| *case_depth == depth)
2912 {
2913 Self::push_capture_char(content, ')');
2914 self.advance();
2915 at_command_start = true;
2916 expecting_redirection_target = false;
2917 continue;
2918 }
2919 depth -= 1;
2920 self.advance();
2921 if depth == 0 {
2922 Self::push_capture_char(content, ')');
2923 return true;
2924 }
2925 Self::push_capture_char(content, c);
2926 at_command_start = false;
2927 expecting_redirection_target = false;
2928 }
2929 '"' => {
2930 let had_word = !current_word.is_empty();
2931 Self::flush_command_subst_keyword(
2932 &mut current_word,
2933 &mut pending_case_headers,
2934 &mut case_clause_depths,
2935 depth,
2936 &mut current_word_started_at_command_start,
2937 );
2938 if had_word && expecting_redirection_target {
2939 expecting_redirection_target = false;
2940 }
2941 Self::push_capture_char(content, '"');
2943 self.advance();
2944 while let Some(qc) = self.peek_char() {
2945 match qc {
2946 '"' => {
2947 Self::push_capture_char(content, '"');
2948 self.advance();
2949 break;
2950 }
2951 '\\' => {
2952 Self::push_capture_char(content, '\\');
2953 self.advance();
2954 if let Some(esc) = self.peek_char() {
2955 Self::push_capture_char(content, esc);
2956 self.advance();
2957 }
2958 }
2959 '$' => {
2960 Self::push_capture_char(content, '$');
2961 self.advance();
2962 if self.peek_char() == Some('(') {
2963 if self.second_char() == Some('(') {
2964 if !self.read_arithmetic_expansion_into(content) {
2965 return false;
2966 }
2967 } else {
2968 Self::push_capture_char(content, '(');
2969 self.advance();
2970 if !self
2971 .read_command_subst_into_depth(content, subst_depth + 1)
2972 {
2973 return false;
2974 }
2975 }
2976 }
2977 }
2978 _ => {
2979 Self::push_capture_char(content, qc);
2980 self.advance();
2981 }
2982 }
2983 }
2984 if expecting_redirection_target {
2985 expecting_redirection_target = false;
2986 } else {
2987 at_command_start = false;
2988 }
2989 }
2990 '\'' => {
2991 let had_word = !current_word.is_empty();
2992 Self::flush_command_subst_keyword(
2993 &mut current_word,
2994 &mut pending_case_headers,
2995 &mut case_clause_depths,
2996 depth,
2997 &mut current_word_started_at_command_start,
2998 );
2999 if had_word && expecting_redirection_target {
3000 expecting_redirection_target = false;
3001 }
3002 Self::push_capture_char(content, '\'');
3004 self.advance();
3005 while let Some(qc) = self.peek_char() {
3006 Self::push_capture_char(content, qc);
3007 self.advance();
3008 if qc == '\'' {
3009 break;
3010 }
3011 }
3012 if expecting_redirection_target {
3013 expecting_redirection_target = false;
3014 } else {
3015 at_command_start = false;
3016 }
3017 }
3018 '`' => {
3019 let had_word = !current_word.is_empty();
3020 Self::flush_command_subst_keyword(
3021 &mut current_word,
3022 &mut pending_case_headers,
3023 &mut case_clause_depths,
3024 depth,
3025 &mut current_word_started_at_command_start,
3026 );
3027 if had_word && expecting_redirection_target {
3028 expecting_redirection_target = false;
3029 }
3030 self.read_command_subst_backtick_segment_into(content);
3031 if expecting_redirection_target {
3032 expecting_redirection_target = false;
3033 } else {
3034 at_command_start = false;
3035 }
3036 }
3037 '$' if self.second_char() == Some('\'') => {
3038 let had_word = !current_word.is_empty();
3039 Self::flush_command_subst_keyword(
3040 &mut current_word,
3041 &mut pending_case_headers,
3042 &mut case_clause_depths,
3043 depth,
3044 &mut current_word_started_at_command_start,
3045 );
3046 if had_word && expecting_redirection_target {
3047 expecting_redirection_target = false;
3048 }
3049 Self::push_capture_char(content, '$');
3050 self.advance();
3051 Self::push_capture_char(content, '\'');
3052 self.advance();
3053 while let Some(qc) = self.peek_char() {
3054 Self::push_capture_char(content, qc);
3055 self.advance();
3056 if qc == '\\' {
3057 if let Some(esc) = self.peek_char() {
3058 Self::push_capture_char(content, esc);
3059 self.advance();
3060 }
3061 continue;
3062 }
3063 if qc == '\'' {
3064 break;
3065 }
3066 }
3067 if expecting_redirection_target {
3068 expecting_redirection_target = false;
3069 } else {
3070 at_command_start = false;
3071 }
3072 }
3073 '\\' => {
3074 let had_word = !current_word.is_empty();
3075 Self::flush_command_subst_keyword(
3076 &mut current_word,
3077 &mut pending_case_headers,
3078 &mut case_clause_depths,
3079 depth,
3080 &mut current_word_started_at_command_start,
3081 );
3082 if had_word && expecting_redirection_target {
3083 expecting_redirection_target = false;
3084 }
3085 Self::push_capture_char(content, '\\');
3086 self.advance();
3087 if let Some(esc) = self.peek_char() {
3088 Self::push_capture_char(content, esc);
3089 self.advance();
3090 }
3091 if expecting_redirection_target {
3092 expecting_redirection_target = false;
3093 } else {
3094 at_command_start = false;
3095 }
3096 }
3097 '<' if self.second_char() == Some('<') => {
3098 let word_was_redirection_fd = current_word_started_at_command_start
3099 && !current_word.is_empty()
3100 && current_word.chars().all(|current| current.is_ascii_digit());
3101 Self::flush_command_subst_keyword(
3102 &mut current_word,
3103 &mut pending_case_headers,
3104 &mut case_clause_depths,
3105 depth,
3106 &mut current_word_started_at_command_start,
3107 );
3108 if word_was_redirection_fd {
3109 at_command_start = true;
3110 }
3111
3112 Self::push_capture_char(content, '<');
3113 self.advance();
3114 Self::push_capture_char(content, '<');
3115 self.advance();
3116
3117 if self.peek_char() == Some('<') {
3118 Self::push_capture_char(content, '<');
3119 self.advance();
3120 expecting_redirection_target = true;
3121 continue;
3122 }
3123
3124 let strip_tabs = if self.peek_char() == Some('-') {
3125 Self::push_capture_char(content, '-');
3126 self.advance();
3127 true
3128 } else {
3129 false
3130 };
3131
3132 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3133 {
3134 pending_heredocs.push((delimiter, strip_tabs));
3135 expecting_redirection_target = false;
3136 } else {
3137 expecting_redirection_target = true;
3138 }
3139 }
3140 '>' | '<' => {
3141 let word_was_redirection_fd = current_word_started_at_command_start
3142 && !current_word.is_empty()
3143 && current_word.chars().all(|current| current.is_ascii_digit());
3144 Self::flush_command_subst_keyword(
3145 &mut current_word,
3146 &mut pending_case_headers,
3147 &mut case_clause_depths,
3148 depth,
3149 &mut current_word_started_at_command_start,
3150 );
3151 if word_was_redirection_fd {
3152 at_command_start = true;
3153 }
3154 Self::push_capture_char(content, c);
3155 self.advance();
3156 expecting_redirection_target = true;
3157 }
3158 '\n' => {
3159 Self::flush_command_subst_keyword(
3160 &mut current_word,
3161 &mut pending_case_headers,
3162 &mut case_clause_depths,
3163 depth,
3164 &mut current_word_started_at_command_start,
3165 );
3166 Self::push_capture_char(content, '\n');
3167 self.advance();
3168 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3169 if !self.read_command_subst_pending_heredoc_into(
3170 content, &delimiter, strip_tabs,
3171 ) {
3172 return false;
3173 }
3174 }
3175 at_command_start = true;
3176 expecting_redirection_target = false;
3177 }
3178 _ => {
3179 if c.is_ascii_alphanumeric() || c == '_' {
3180 if current_word.is_empty()
3181 && !expecting_redirection_target
3182 && at_command_start
3183 {
3184 current_word_started_at_command_start = true;
3185 at_command_start = false;
3186 }
3187 current_word.push(c);
3188 } else {
3189 let had_word = !current_word.is_empty();
3190 Self::flush_command_subst_keyword(
3191 &mut current_word,
3192 &mut pending_case_headers,
3193 &mut case_clause_depths,
3194 depth,
3195 &mut current_word_started_at_command_start,
3196 );
3197 if had_word && expecting_redirection_target {
3198 expecting_redirection_target = false;
3199 }
3200 match c {
3201 ' ' | '\t' => {}
3202 ';' | '|' | '&' => {
3203 at_command_start = true;
3204 expecting_redirection_target = false;
3205 }
3206 _ => {
3207 if !expecting_redirection_target {
3208 at_command_start = false;
3209 }
3210 }
3211 }
3212 }
3213 Self::push_capture_char(content, c);
3214 self.advance();
3215 }
3216 }
3217 }
3218
3219 false
3220 }
3221
3222 fn read_param_expansion_into(
3226 &mut self,
3227 content: &mut Option<String>,
3228 segment_start: Position,
3229 ) -> bool {
3230 let mut borrowable = true;
3231 let mut depth = 1;
3232 let mut literal_brace_depth = 0usize;
3233 let mut in_single = false;
3234 let mut in_double = false;
3235 let mut double_quote_depth = 0usize;
3236 while let Some(c) = self.peek_char() {
3237 if in_single {
3238 match c {
3239 '\\' => {
3240 let escape_start = self.current_position();
3241 if self.second_char() == Some('"') {
3242 self.advance();
3243 borrowable = false;
3244 self.ensure_capture_from_source(content, segment_start, escape_start);
3245 Self::push_capture_char(content, '"');
3246 self.advance();
3247 } else {
3248 Self::push_capture_char(content, '\\');
3249 self.advance();
3250 }
3251 }
3252 '\'' => {
3253 Self::push_capture_char(content, c);
3254 self.advance();
3255 in_single = false;
3256 }
3257 _ => {
3258 Self::push_capture_char(content, c);
3259 self.advance();
3260 }
3261 }
3262 continue;
3263 }
3264
3265 match c {
3266 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3267 self.advance();
3268 Self::push_capture_char(content, '}');
3269 if depth == 1
3270 && literal_brace_depth > 0
3271 && self.has_later_top_level_param_expansion_closer(depth)
3272 {
3273 literal_brace_depth -= 1;
3274 continue;
3275 }
3276 depth -= 1;
3277 if depth == 0 {
3278 break;
3279 }
3280 }
3281 '{' if !in_single && !in_double => {
3282 literal_brace_depth += 1;
3283 Self::push_capture_char(content, '{');
3284 self.advance();
3285 }
3286 '"' => {
3287 Self::push_capture_char(content, '"');
3289 self.advance();
3290 in_double = !in_double;
3291 double_quote_depth = if in_double { depth } else { 0 };
3292 }
3293 '\'' => {
3294 Self::push_capture_char(content, '\'');
3295 self.advance();
3296 if !in_double {
3297 in_single = true;
3298 }
3299 }
3300 '\\' => {
3301 let escape_start = self.current_position();
3304 self.advance();
3305 if let Some(esc) = self.peek_char() {
3306 match esc {
3307 '$' => {
3308 borrowable = false;
3309 self.ensure_capture_from_source(
3310 content,
3311 segment_start,
3312 escape_start,
3313 );
3314 Self::push_capture_char(content, '\x00');
3315 Self::push_capture_char(content, '$');
3316 self.advance();
3317 }
3318 '"' | '\\' | '`' => {
3319 borrowable = false;
3320 self.ensure_capture_from_source(
3321 content,
3322 segment_start,
3323 escape_start,
3324 );
3325 Self::push_capture_char(content, esc);
3326 self.advance();
3327 }
3328 '}' => {
3329 Self::push_capture_char(content, '\\');
3331 Self::push_capture_char(content, '}');
3332 self.advance();
3333 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3334 }
3335 _ => {
3336 Self::push_capture_char(content, '\\');
3337 Self::push_capture_char(content, esc);
3338 self.advance();
3339 }
3340 }
3341 } else {
3342 Self::push_capture_char(content, '\\');
3343 }
3344 }
3345 '$' => {
3346 Self::push_capture_char(content, '$');
3347 self.advance();
3348 if self.peek_char() == Some('(') {
3349 if self.second_char() == Some('(') {
3350 if !self.read_arithmetic_expansion_into(content) {
3351 borrowable = false;
3352 }
3353 } else {
3354 Self::push_capture_char(content, '(');
3355 self.advance();
3356 self.read_command_subst_into(content);
3357 }
3358 } else if self.peek_char() == Some('{') {
3359 Self::push_capture_char(content, '{');
3360 self.advance();
3361 borrowable &= self.read_param_expansion_into(content, segment_start);
3362 }
3363 }
3364 _ => {
3365 Self::push_capture_char(content, c);
3366 self.advance();
3367 }
3368 }
3369 }
3370 borrowable
3371 }
3372
3373 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3374 let mut chars = self.lookahead_chars().peekable();
3375 let mut depth = target_depth;
3376 let mut in_single = false;
3377 let mut in_double = false;
3378 let mut double_quote_depth = 0usize;
3379
3380 while let Some(ch) = chars.next() {
3381 if in_single {
3382 match ch {
3383 '\'' => in_single = false,
3384 '\\' if chars.peek() == Some(&'"') => {
3385 chars.next();
3386 }
3387 '\\' => {}
3388 _ => {}
3389 }
3390 continue;
3391 }
3392
3393 if in_double {
3394 match ch {
3395 '"' => {
3396 in_double = false;
3397 double_quote_depth = 0;
3398 }
3399 '\\' => {
3400 chars.next();
3401 }
3402 '$' if chars.peek() == Some(&'{') => {
3403 chars.next();
3404 depth += 1;
3405 }
3406 '}' if depth > double_quote_depth => {
3407 depth -= 1;
3408 }
3409 _ => {}
3410 }
3411 continue;
3412 }
3413
3414 match ch {
3415 '\n' if depth == target_depth => return false,
3416 '\'' => in_single = true,
3417 '"' => {
3418 in_double = true;
3419 double_quote_depth = depth;
3420 }
3421 '\\' => {
3422 chars.next();
3423 }
3424 '$' if chars.peek() == Some(&'{') => {
3425 chars.next();
3426 depth += 1;
3427 }
3428 '}' => {
3429 if depth == target_depth {
3430 return true;
3431 }
3432 depth -= 1;
3433 }
3434 _ => {}
3435 }
3436 }
3437
3438 false
3439 }
3440
3441 fn looks_like_brace_expansion(&self) -> bool {
3447 const MAX_LOOKAHEAD: usize = 10_000;
3448
3449 let mut chars = self.lookahead_chars();
3450
3451 if chars.next() != Some('{') {
3453 return false;
3454 }
3455
3456 let mut depth = 1;
3457 let mut paren_depth = 0usize;
3458 let mut has_comma = false;
3459 let mut has_dot_dot = false;
3460 let mut escaped = false;
3461 let mut in_single = false;
3462 let mut in_double = false;
3463 let mut in_backtick = false;
3464 let mut prev_char = None;
3465 let mut scanned = 0usize;
3466
3467 for ch in chars {
3468 scanned += 1;
3469 if scanned > MAX_LOOKAHEAD {
3470 return false;
3471 }
3472
3473 let brace_surface_active = !in_single && !in_double && !in_backtick;
3474 let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3475
3476 match ch {
3477 _ if escaped => {
3478 escaped = false;
3479 }
3480 '\\' if !in_single => escaped = true,
3481 '\'' if !in_double && !in_backtick => in_single = !in_single,
3482 '"' if !in_single && !in_backtick => in_double = !in_double,
3483 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3484 '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3485 paren_depth += 1
3486 }
3487 ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3488 '{' if !in_single && !in_double && !in_backtick => depth += 1,
3489 '}' if !in_single && !in_double && !in_backtick => {
3490 depth -= 1;
3491 if depth == 0 {
3492 return has_comma || has_dot_dot;
3494 }
3495 }
3496 ',' if at_top_level => has_comma = true,
3497 '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3498 ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3500 _ => {}
3501 }
3502 prev_char = Some(ch);
3503 }
3504
3505 false
3506 }
3507
3508 fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3509 let mut brace_depth = 1usize;
3510 let mut paren_depth = 0usize;
3511 let mut escaped = false;
3512 let mut in_single = false;
3513 let mut in_double = false;
3514 let mut in_backtick = false;
3515 let mut prev_char = None;
3516
3517 while let Some(ch) = self.peek_char() {
3518 Self::push_capture_char(word, ch);
3519 self.advance();
3520
3521 if escaped {
3522 escaped = false;
3523 prev_char = Some(ch);
3524 continue;
3525 }
3526
3527 match ch {
3528 '\\' if !in_single => escaped = true,
3529 '\'' if !in_double && !in_backtick => in_single = !in_single,
3530 '"' if !in_single && !in_backtick => in_double = !in_double,
3531 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3532 '(' if !in_single
3533 && !in_double
3534 && !in_backtick
3535 && (paren_depth > 0 || prev_char == Some('$')) =>
3536 {
3537 paren_depth += 1
3538 }
3539 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3540 paren_depth -= 1
3541 }
3542 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3543 '}' if !in_single && !in_double && !in_backtick => {
3544 brace_depth -= 1;
3545 if brace_depth == 0 {
3546 break;
3547 }
3548 }
3549 _ => {}
3550 }
3551
3552 prev_char = Some(ch);
3553 }
3554 }
3555
3556 fn consume_brace_word_body(&mut self, word: &mut String) {
3557 let mut brace_depth = 1usize;
3558 let mut paren_depth = 0usize;
3559 let mut escaped = false;
3560 let mut in_single = false;
3561 let mut in_double = false;
3562 let mut in_backtick = false;
3563 let mut prev_char = None;
3564
3565 while let Some(ch) = self.peek_char() {
3566 word.push(ch);
3567 self.advance();
3568
3569 if escaped {
3570 escaped = false;
3571 prev_char = Some(ch);
3572 continue;
3573 }
3574
3575 match ch {
3576 '\\' if !in_single => escaped = true,
3577 '\'' if !in_double && !in_backtick => in_single = !in_single,
3578 '"' if !in_single && !in_backtick => in_double = !in_double,
3579 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3580 '(' if !in_single
3581 && !in_double
3582 && !in_backtick
3583 && (paren_depth > 0 || prev_char == Some('$')) =>
3584 {
3585 paren_depth += 1
3586 }
3587 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3588 paren_depth -= 1
3589 }
3590 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3591 '}' if !in_single && !in_double && !in_backtick => {
3592 brace_depth -= 1;
3593 if brace_depth == 0 {
3594 break;
3595 }
3596 }
3597 _ => {}
3598 }
3599
3600 prev_char = Some(ch);
3601 }
3602 }
3603
3604 fn looks_like_mid_word_brace_segment(&self) -> bool {
3607 const MAX_LOOKAHEAD: usize = 10_000;
3608
3609 let mut chars = self.lookahead_chars();
3610 if chars.next() != Some('{') {
3611 return false;
3612 }
3613
3614 let mut brace_depth = 1;
3615 let mut paren_depth = 0usize;
3616 let mut escaped = false;
3617 let mut in_single = false;
3618 let mut in_double = false;
3619 let mut in_backtick = false;
3620 let mut prev_char = None;
3621 let mut scanned = 0usize;
3622
3623 for ch in chars {
3624 scanned += 1;
3625 if scanned > MAX_LOOKAHEAD {
3626 return false;
3627 }
3628
3629 if !in_single
3630 && !in_double
3631 && !in_backtick
3632 && !escaped
3633 && brace_depth == 1
3634 && paren_depth == 0
3635 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3636 {
3637 return false;
3638 }
3639
3640 if escaped {
3641 escaped = false;
3642 prev_char = Some(ch);
3643 continue;
3644 }
3645
3646 match ch {
3647 '\\' => escaped = true,
3648 '\'' if !in_double && !in_backtick => in_single = !in_single,
3649 '"' if !in_single && !in_backtick => in_double = !in_double,
3650 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3651 '(' if !in_single
3652 && !in_double
3653 && !in_backtick
3654 && (paren_depth > 0 || prev_char == Some('$')) =>
3655 {
3656 paren_depth += 1
3657 }
3658 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3659 paren_depth -= 1
3660 }
3661 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3662 '}' if !in_single && !in_double && !in_backtick => {
3663 brace_depth -= 1;
3664 if brace_depth == 0 {
3665 return true;
3666 }
3667 }
3668 _ => {}
3669 }
3670
3671 prev_char = Some(ch);
3672 }
3673
3674 false
3675 }
3676
3677 fn is_brace_group_start(&self) -> bool {
3679 let mut chars = self.lookahead_chars();
3680 if chars.next() != Some('{') {
3682 return false;
3683 }
3684 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3686 }
3687
3688 fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3691 const MAX_LOOKAHEAD: usize = 10_000;
3692
3693 let mut chars = self.lookahead_chars();
3694 let mut depth = 1;
3695 let mut has_comma = false;
3696 let mut has_dot_dot = false;
3697 let mut prev_char = None;
3698 let mut scanned = 0usize;
3699
3700 for ch in chars.by_ref() {
3701 scanned += 1;
3702 if scanned > MAX_LOOKAHEAD {
3703 return false;
3704 }
3705 match ch {
3706 '{' => depth += 1,
3707 '}' => {
3708 depth -= 1;
3709 if depth == 0 {
3710 return has_comma || has_dot_dot;
3711 }
3712 }
3713 ',' if depth == 1 => has_comma = true,
3714 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3715 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3716 _ => {}
3717 }
3718 prev_char = Some(ch);
3719 }
3720
3721 false
3722 }
3723
3724 fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3725 let mut chars = self.lookahead_chars();
3726 if chars.next() != Some('{') {
3727 return false;
3728 }
3729 chars.next() == Some(')')
3730 }
3731
3732 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3734 let mut word = String::with_capacity(16);
3735
3736 if let Some('{') = self.peek_char() {
3737 word.push('{');
3738 self.advance();
3739 } else {
3740 return None;
3741 }
3742
3743 self.consume_brace_word_body(&mut word);
3744
3745 while let Some(ch) = self.peek_char() {
3746 if Self::is_word_char(ch) {
3747 if self.reinject_buf.is_empty() {
3748 let chunk = self.cursor.eat_while(Self::is_word_char);
3749 word.push_str(chunk);
3750 self.advance_scanned_source_bytes(chunk.len());
3751 } else {
3752 word.push(ch);
3753 self.advance();
3754 }
3755 } else {
3756 break;
3757 }
3758 }
3759
3760 Some(LexedToken::owned_word(TokenKind::Word, word))
3761 }
3762
3763 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3765 let mut word = String::with_capacity(16);
3766
3767 if let Some('{') = self.peek_char() {
3769 word.push('{');
3770 self.advance();
3771 } else {
3772 return None;
3773 }
3774
3775 self.consume_brace_word_body(&mut word);
3777
3778 while let Some(ch) = self.peek_char() {
3780 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3781 if ch == '{' {
3782 word.push(ch);
3784 self.advance();
3785 self.consume_brace_word_body(&mut word);
3786 } else {
3787 word.push(ch);
3788 self.advance();
3789 }
3790 } else {
3791 break;
3792 }
3793 }
3794
3795 Some(LexedToken::owned_word(TokenKind::Word, word))
3796 }
3797
3798 fn looks_like_assoc_assign(&self) -> bool {
3802 let mut chars = self.lookahead_chars();
3803 if chars.next() != Some('(') {
3805 return false;
3806 }
3807 for ch in chars {
3809 match ch {
3810 ' ' | '\t' => continue,
3811 '[' => return true,
3812 _ => return false,
3813 }
3814 }
3815 false
3816 }
3817
3818 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3819 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3820 }
3821
3822 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3823 word.segments().any(|segment| {
3824 matches!(
3825 segment.kind(),
3826 LexedWordSegmentKind::SingleQuoted
3827 | LexedWordSegmentKind::DollarSingleQuoted
3828 | LexedWordSegmentKind::DoubleQuoted
3829 | LexedWordSegmentKind::DollarDoubleQuoted
3830 )
3831 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3832 }
3833
3834 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3835 text.contains(['*', '?'])
3836 || text.ends_with('}') && text.contains("${")
3837 || text.ends_with(']')
3838 && text
3839 .rfind('[')
3840 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3841 }
3842
3843 fn is_word_char(ch: char) -> bool {
3844 !matches!(
3845 ch,
3846 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3847 )
3848 }
3849
3850 const fn is_ascii_word_byte(byte: u8) -> bool {
3851 !matches!(
3852 byte,
3853 b' ' | b'\t'
3854 | b'\n'
3855 | b';'
3856 | b'|'
3857 | b'&'
3858 | b'>'
3859 | b'<'
3860 | b'('
3861 | b')'
3862 | b'{'
3863 | b'}'
3864 | b'\''
3865 | b'"'
3866 )
3867 }
3868
3869 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3870 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3871 }
3872
3873 fn is_plain_word_char(ch: char) -> bool {
3874 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3875 }
3876
3877 pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3879 let mut content = String::with_capacity(64);
3880 let mut current_line = String::with_capacity(64);
3881
3882 let mut rest_of_line = String::with_capacity(32);
3889 let rest_of_line_start = self.current_position();
3890 let mut in_double_quote = false;
3891 let mut in_single_quote = false;
3892 let mut in_comment = false;
3893 let mut saw_non_whitespace_tail = false;
3894 let mut consecutive_backslashes = 0usize;
3895 let mut previous_tail_char = None;
3896 while let Some(ch) = self.peek_char() {
3897 self.advance();
3898 if in_comment {
3899 if ch == '\n' {
3900 break;
3901 }
3902 rest_of_line.push(ch);
3903 previous_tail_char = Some(ch);
3904 continue;
3905 }
3906 if ch == '#'
3907 && !in_single_quote
3908 && !in_double_quote
3909 && self.comments_enabled()
3910 && heredoc_tail_hash_starts_comment(previous_tail_char)
3911 {
3912 in_comment = true;
3913 rest_of_line.push(ch);
3914 previous_tail_char = Some(ch);
3915 consecutive_backslashes = 0;
3916 continue;
3917 }
3918 let backslash_continues_line = ch == '\\'
3919 && !in_single_quote
3920 && self.peek_char() == Some('\n')
3921 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3922 && consecutive_backslashes.is_multiple_of(2);
3923 if backslash_continues_line {
3924 rest_of_line.push(ch);
3925 rest_of_line.push('\n');
3926 self.advance();
3927 consecutive_backslashes = 0;
3928 continue;
3929 }
3930 if ch == '\n' && !in_double_quote && !in_single_quote {
3931 break;
3932 }
3933 if ch == '"' && !in_single_quote {
3934 in_double_quote = !in_double_quote;
3935 } else if ch == '\'' && !in_double_quote {
3936 in_single_quote = !in_single_quote;
3937 } else if ch == '\\' && in_double_quote {
3938 rest_of_line.push(ch);
3940 if let Some(next) = self.peek_char() {
3941 rest_of_line.push(next);
3942 self.advance();
3943 }
3944 continue;
3945 }
3946 rest_of_line.push(ch);
3947 if !ch.is_whitespace() {
3948 saw_non_whitespace_tail = true;
3949 }
3950 if ch == '\\' && !in_single_quote {
3951 consecutive_backslashes += 1;
3952 } else {
3953 consecutive_backslashes = 0;
3954 }
3955 previous_tail_char = Some(ch);
3956 }
3957
3958 self.sync_offset_to_cursor();
3962 let content_start = self.current_position();
3963 let mut current_line_start = content_start;
3964 let content_end;
3965
3966 loop {
3968 if self.reinject_buf.is_empty() {
3969 self.sync_offset_to_cursor();
3975 let rest = self.cursor.rest();
3976 if rest.is_empty() {
3977 content_end = self.current_position();
3978 break;
3979 }
3980
3981 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3982 let line = &rest[..line_len];
3983 let has_newline = line_len < rest.len();
3984
3985 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3986 content_end = current_line_start;
3987 self.consume_source_bytes(line_len);
3988 if has_newline {
3989 self.consume_ascii_chars(1);
3990 }
3991 break;
3992 }
3993
3994 content.push_str(line);
3995 self.consume_source_bytes(line_len);
3996
3997 if has_newline {
3998 self.consume_ascii_chars(1);
3999 content.push('\n');
4000 current_line_start = self.current_position();
4001 continue;
4002 }
4003
4004 content_end = self.current_position();
4005 break;
4006 }
4007
4008 match self.peek_char() {
4009 Some('\n') => {
4010 self.advance();
4011 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4013 content_end = current_line_start;
4014 break;
4015 }
4016 content.push_str(¤t_line);
4017 content.push('\n');
4018 current_line.clear();
4019 current_line_start = self.current_position();
4020 }
4021 Some(ch) => {
4022 current_line.push(ch);
4023 self.advance();
4024 }
4025 None => {
4026 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4028 content_end = current_line_start;
4029 break;
4030 }
4031 if !current_line.is_empty() {
4032 content.push_str(¤t_line);
4033 }
4034 content_end = self.current_position();
4035 break;
4036 }
4037 }
4038 }
4039
4040 let post_heredoc_offset = self.offset;
4045 self.offset = rest_of_line_start.offset;
4046 for ch in rest_of_line.chars() {
4047 self.reinject_buf.push_back(ch);
4048 }
4049 self.reinject_buf.push_back('\n');
4050 self.reinject_resume_offset = Some(post_heredoc_offset);
4051
4052 HeredocRead {
4053 content,
4054 content_span: Span::from_positions(content_start, content_end),
4055 }
4056 }
4057
4058 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4059 let mut chars = self.cursor.rest().chars();
4060 if chars.next() != Some('\n') {
4061 return false;
4062 }
4063
4064 for ch in chars {
4065 if matches!(ch, ' ' | '\t') {
4066 continue;
4067 }
4068 if ch == '\n' {
4069 return false;
4070 }
4071 return matches!(ch, '|' | '&' | ';' | '<' | '>')
4072 || (ch == '#' && self.comments_enabled());
4073 }
4074
4075 false
4076 }
4077}
4078
4079fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4080 let line = if strip_tabs {
4081 line.trim_start_matches('\t')
4082 } else {
4083 line
4084 };
4085
4086 if line == delimiter {
4087 return true;
4088 }
4089
4090 let Some(trailing) = line.strip_prefix(delimiter) else {
4091 return false;
4092 };
4093
4094 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4095}
4096
4097fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4098 previous_tail_char.is_none_or(|prev| {
4099 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4100 })
4101}
4102
4103fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4104 let ch = input.get(index..)?.chars().next()?;
4105 Some((ch, index + ch.len_utf8()))
4106}
4107
4108fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4109 let mut index = 0usize;
4110 let mut depth = 0usize;
4111 let mut in_single = false;
4112 let mut in_double = false;
4113 let mut in_backtick = false;
4114 let mut escaped = false;
4115
4116 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4117 let was_escaped = escaped;
4118 if ch == '\\' && !in_single {
4119 escaped = !escaped;
4120 index = next_index;
4121 continue;
4122 }
4123 escaped = false;
4124
4125 match ch {
4126 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4127 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4128 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4129 '(' if !in_single
4130 && !in_double
4131 && !in_backtick
4132 && !was_escaped
4133 && prefix[next_index..].starts_with('(') =>
4134 {
4135 depth += 1;
4136 index = next_index + '('.len_utf8();
4137 continue;
4138 }
4139 ')' if !in_single
4140 && !in_double
4141 && !in_backtick
4142 && !was_escaped
4143 && prefix[next_index..].starts_with(')') =>
4144 {
4145 depth = depth.saturating_sub(1);
4146 index = next_index + ')'.len_utf8();
4147 continue;
4148 }
4149 _ => {}
4150 }
4151
4152 index = next_index;
4153 }
4154
4155 depth > 0
4156}
4157
4158fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4159 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4160 let prefix = &input[line_start..index];
4161 line_has_unclosed_double_paren(prefix)
4162}
4163
4164fn hash_starts_comment(input: &str, index: usize) -> bool {
4165 if inside_unclosed_double_paren_on_line(input, index) {
4166 return false;
4167 }
4168
4169 let next = &input[index + '#'.len_utf8()..];
4170 input[..index]
4171 .chars()
4172 .next_back()
4173 .is_none_or(|prev| match prev {
4174 '(' => {
4175 let whitespace_index = next.find(char::is_whitespace);
4176 let close_index = next.find(')');
4177
4178 match (whitespace_index, close_index) {
4179 (Some(whitespace), Some(close)) => whitespace < close,
4180 (Some(_), None) | (None, None) => true,
4181 (None, Some(_)) => false,
4182 }
4183 }
4184 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4185 })
4186}
4187
4188fn heredoc_delimiter_is_terminator(
4189 ch: char,
4190 in_single: bool,
4191 in_double: bool,
4192 escaped: bool,
4193) -> bool {
4194 !in_single
4195 && !in_double
4196 && !escaped
4197 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4198}
4199
4200fn scan_double_quoted_command_substitution_segment(
4201 input: &str,
4202 mut index: usize,
4203 subst_depth: usize,
4204) -> Option<usize> {
4205 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4206 match ch {
4207 '"' => return Some(next_index),
4208 '\\' => {
4209 index = next_index;
4210 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4211 index = escaped_next;
4212 }
4213 }
4214 '$' if input[next_index..].starts_with('{') => {
4215 let consumed = scan_command_subst_parameter_expansion_len(
4216 &input[next_index + '{'.len_utf8()..],
4217 subst_depth,
4218 )?;
4219 index = next_index + '{'.len_utf8() + consumed;
4220 }
4221 '$' if input[next_index..].starts_with('(')
4222 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4223 {
4224 let consumed = scan_command_substitution_body_len_inner(
4225 &input[next_index + '('.len_utf8()..],
4226 subst_depth + 1,
4227 )?;
4228 index = next_index + '('.len_utf8() + consumed;
4229 }
4230 _ => index = next_index,
4231 }
4232 }
4233
4234 None
4235}
4236
4237fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4238 let mut index = 0usize;
4239 let mut in_single = false;
4240 let mut in_double = false;
4241 let mut in_ansi_c_single = false;
4242 let mut in_backtick = false;
4243 let mut escaped = false;
4244 let mut ansi_c_quote_pending = false;
4245
4246 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4247 let was_escaped = escaped;
4248 if ch == '\\' && !in_single {
4249 escaped = !escaped;
4250 index = next_index;
4251 ansi_c_quote_pending = false;
4252 continue;
4253 }
4254 escaped = false;
4255
4256 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4257 if input[next_index..].starts_with('{')
4258 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4259 &input[next_index + '{'.len_utf8()..],
4260 subst_depth,
4261 )
4262 {
4263 index = next_index + '{'.len_utf8() + consumed;
4264 ansi_c_quote_pending = false;
4265 continue;
4266 }
4267
4268 if input[next_index..].starts_with('(')
4269 && !input[next_index + '('.len_utf8()..].starts_with('(')
4270 && let Some(consumed) = scan_command_substitution_body_len_inner(
4271 &input[next_index + '('.len_utf8()..],
4272 subst_depth + 1,
4273 )
4274 {
4275 index = next_index + '('.len_utf8() + consumed;
4276 ansi_c_quote_pending = false;
4277 continue;
4278 }
4279 }
4280
4281 if !in_single
4282 && !in_ansi_c_single
4283 && !in_double
4284 && !in_backtick
4285 && !was_escaped
4286 && matches!(ch, '<' | '>')
4287 && input[next_index..].starts_with('(')
4288 && let Some(consumed) = scan_command_substitution_body_len_inner(
4289 &input[next_index + '('.len_utf8()..],
4290 subst_depth + 1,
4291 )
4292 {
4293 index = next_index + '('.len_utf8() + consumed;
4294 ansi_c_quote_pending = false;
4295 continue;
4296 }
4297
4298 match ch {
4299 '\'' if !in_double && !in_backtick && !was_escaped => {
4300 if in_ansi_c_single {
4301 in_ansi_c_single = false;
4302 } else if !in_single && ansi_c_quote_pending {
4303 in_ansi_c_single = true;
4304 } else {
4305 in_single = !in_single;
4306 }
4307 }
4308 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4309 in_double = !in_double
4310 }
4311 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4312 in_backtick = !in_backtick
4313 }
4314 '}' if !in_single
4315 && !in_ansi_c_single
4316 && !in_double
4317 && !in_backtick
4318 && !was_escaped =>
4319 {
4320 return Some(next_index);
4321 }
4322 _ => {}
4323 }
4324
4325 ansi_c_quote_pending = ch == '$'
4326 && !in_single
4327 && !in_ansi_c_single
4328 && !in_double
4329 && !in_backtick
4330 && !was_escaped;
4331 index = next_index;
4332 }
4333
4334 None
4335}
4336
4337fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4338 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4339 if !matches!(ch, ' ' | '\t') {
4340 break;
4341 }
4342 index = next_index;
4343 }
4344
4345 let start = index;
4346 let mut cooked = String::new();
4347 let mut in_single = false;
4348 let mut in_double = false;
4349 let mut escaped = false;
4350
4351 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4352 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4353 break;
4354 }
4355
4356 index = next_index;
4357 if escaped {
4358 cooked.push(ch);
4359 escaped = false;
4360 continue;
4361 }
4362
4363 match ch {
4364 '\\' if !in_single => escaped = true,
4365 '\'' if !in_double => in_single = !in_single,
4366 '"' if !in_single => in_double = !in_double,
4367 _ => cooked.push(ch),
4368 }
4369 }
4370
4371 (index > start).then_some((index, cooked))
4372}
4373
4374fn skip_command_subst_pending_heredoc(
4375 input: &str,
4376 mut index: usize,
4377 delimiter: &str,
4378 strip_tabs: bool,
4379) -> usize {
4380 while index <= input.len() {
4381 let rest = &input[index..];
4382 let line_len = rest.find('\n').unwrap_or(rest.len());
4383 let line = &rest[..line_len];
4384 let has_newline = line_len < rest.len();
4385
4386 index += line_len;
4387 if has_newline {
4388 index += '\n'.len_utf8();
4389 }
4390
4391 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4392 return index;
4393 }
4394 }
4395
4396 index
4397}
4398
4399fn scan_command_subst_ansi_c_single_quoted_segment(
4400 input: &str,
4401 quote_index: usize,
4402) -> Option<usize> {
4403 let mut index = quote_index + '\''.len_utf8();
4404
4405 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4406 index = next_index;
4407 if ch == '\\' {
4408 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4409 index = escaped_next;
4410 }
4411 continue;
4412 }
4413
4414 if ch == '\'' {
4415 return Some(index);
4416 }
4417 }
4418
4419 None
4420}
4421
4422fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4423 let mut index = start;
4424
4425 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4426 index = next_index;
4427 if ch == '\\' {
4428 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4429 index = escaped_next;
4430 }
4431 continue;
4432 }
4433
4434 if ch == '`' {
4435 return Some(index);
4436 }
4437 }
4438
4439 None
4440}
4441
4442fn flush_scanned_command_subst_keyword(
4443 current_word: &mut String,
4444 pending_case_headers: &mut usize,
4445 case_clause_depths: &mut SmallVec<[usize; 4]>,
4446 depth: usize,
4447 word_started_at_command_start: &mut bool,
4448) {
4449 if current_word.is_empty() {
4450 *word_started_at_command_start = false;
4451 return;
4452 }
4453
4454 match current_word.as_str() {
4455 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4456 "in" if *pending_case_headers > 0 => {
4457 *pending_case_headers -= 1;
4458 case_clause_depths.push(depth);
4459 }
4460 "esac" if *word_started_at_command_start => {
4461 case_clause_depths.pop();
4462 }
4463 _ => {}
4464 }
4465
4466 current_word.clear();
4467 *word_started_at_command_start = false;
4468}
4469
4470fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4471 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4472 return None;
4473 }
4474
4475 let mut index = 0usize;
4476 let mut depth = 1;
4477 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4478 let mut pending_case_headers = 0usize;
4479 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4480 let mut current_word = String::with_capacity(16);
4481 let mut at_command_start = true;
4482 let mut expecting_redirection_target = false;
4483 let mut current_word_started_at_command_start = false;
4484
4485 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4486 match ch {
4487 '#' if hash_starts_comment(input, index) => {
4488 let had_word = !current_word.is_empty();
4489 flush_scanned_command_subst_keyword(
4490 &mut current_word,
4491 &mut pending_case_headers,
4492 &mut case_clause_depths,
4493 depth,
4494 &mut current_word_started_at_command_start,
4495 );
4496 if had_word && expecting_redirection_target {
4497 expecting_redirection_target = false;
4498 }
4499 index = next_index;
4500 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4501 index = comment_next;
4502 if comment_ch == '\n' {
4503 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4504 index = skip_command_subst_pending_heredoc(
4505 input, index, &delimiter, strip_tabs,
4506 );
4507 }
4508 at_command_start = true;
4509 expecting_redirection_target = false;
4510 break;
4511 }
4512 }
4513 }
4514 '(' => {
4515 flush_scanned_command_subst_keyword(
4516 &mut current_word,
4517 &mut pending_case_headers,
4518 &mut case_clause_depths,
4519 depth,
4520 &mut current_word_started_at_command_start,
4521 );
4522 depth += 1;
4523 index = next_index;
4524 at_command_start = true;
4525 expecting_redirection_target = false;
4526 }
4527 ')' => {
4528 flush_scanned_command_subst_keyword(
4529 &mut current_word,
4530 &mut pending_case_headers,
4531 &mut case_clause_depths,
4532 depth,
4533 &mut current_word_started_at_command_start,
4534 );
4535 if case_clause_depths
4536 .last()
4537 .is_some_and(|case_depth| *case_depth == depth)
4538 {
4539 index = next_index;
4540 at_command_start = true;
4541 expecting_redirection_target = false;
4542 continue;
4543 }
4544 depth -= 1;
4545 index = next_index;
4546 if depth == 0 {
4547 return Some(index);
4548 }
4549 at_command_start = false;
4550 expecting_redirection_target = false;
4551 }
4552 '"' => {
4553 let had_word = !current_word.is_empty();
4554 flush_scanned_command_subst_keyword(
4555 &mut current_word,
4556 &mut pending_case_headers,
4557 &mut case_clause_depths,
4558 depth,
4559 &mut current_word_started_at_command_start,
4560 );
4561 if had_word && expecting_redirection_target {
4562 expecting_redirection_target = false;
4563 }
4564 index = scan_double_quoted_command_substitution_segment(
4565 input,
4566 next_index,
4567 subst_depth,
4568 )?;
4569 if expecting_redirection_target {
4570 expecting_redirection_target = false;
4571 } else {
4572 at_command_start = false;
4573 }
4574 }
4575 '\'' => {
4576 let had_word = !current_word.is_empty();
4577 flush_scanned_command_subst_keyword(
4578 &mut current_word,
4579 &mut pending_case_headers,
4580 &mut case_clause_depths,
4581 depth,
4582 &mut current_word_started_at_command_start,
4583 );
4584 if had_word && expecting_redirection_target {
4585 expecting_redirection_target = false;
4586 }
4587 index = next_index;
4588 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4589 index = quoted_next;
4590 if quoted_ch == '\'' {
4591 break;
4592 }
4593 }
4594 if expecting_redirection_target {
4595 expecting_redirection_target = false;
4596 } else {
4597 at_command_start = false;
4598 }
4599 }
4600 '`' => {
4601 let had_word = !current_word.is_empty();
4602 flush_scanned_command_subst_keyword(
4603 &mut current_word,
4604 &mut pending_case_headers,
4605 &mut case_clause_depths,
4606 depth,
4607 &mut current_word_started_at_command_start,
4608 );
4609 if had_word && expecting_redirection_target {
4610 expecting_redirection_target = false;
4611 }
4612 index = scan_command_subst_backtick_segment(input, next_index)?;
4613 if expecting_redirection_target {
4614 expecting_redirection_target = false;
4615 } else {
4616 at_command_start = false;
4617 }
4618 }
4619 '$' if input[next_index..].starts_with('\'') => {
4620 let had_word = !current_word.is_empty();
4621 flush_scanned_command_subst_keyword(
4622 &mut current_word,
4623 &mut pending_case_headers,
4624 &mut case_clause_depths,
4625 depth,
4626 &mut current_word_started_at_command_start,
4627 );
4628 if had_word && expecting_redirection_target {
4629 expecting_redirection_target = false;
4630 }
4631 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4632 if expecting_redirection_target {
4633 expecting_redirection_target = false;
4634 } else {
4635 at_command_start = false;
4636 }
4637 }
4638 '\\' => {
4639 let had_word = !current_word.is_empty();
4640 flush_scanned_command_subst_keyword(
4641 &mut current_word,
4642 &mut pending_case_headers,
4643 &mut case_clause_depths,
4644 depth,
4645 &mut current_word_started_at_command_start,
4646 );
4647 if had_word && expecting_redirection_target {
4648 expecting_redirection_target = false;
4649 }
4650 index = next_index;
4651 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4652 index = escaped_next;
4653 }
4654 if expecting_redirection_target {
4655 expecting_redirection_target = false;
4656 } else {
4657 at_command_start = false;
4658 }
4659 }
4660 '>' => {
4661 let word_was_redirection_fd = current_word_started_at_command_start
4662 && !current_word.is_empty()
4663 && current_word.chars().all(|current| current.is_ascii_digit());
4664 flush_scanned_command_subst_keyword(
4665 &mut current_word,
4666 &mut pending_case_headers,
4667 &mut case_clause_depths,
4668 depth,
4669 &mut current_word_started_at_command_start,
4670 );
4671 if word_was_redirection_fd {
4672 at_command_start = true;
4673 }
4674 index = next_index;
4675 expecting_redirection_target = true;
4676 }
4677 '<' if input[next_index..].starts_with('<') => {
4678 let word_was_redirection_fd = current_word_started_at_command_start
4679 && !current_word.is_empty()
4680 && current_word.chars().all(|current| current.is_ascii_digit());
4681 let had_word = !current_word.is_empty();
4682 flush_scanned_command_subst_keyword(
4683 &mut current_word,
4684 &mut pending_case_headers,
4685 &mut case_clause_depths,
4686 depth,
4687 &mut current_word_started_at_command_start,
4688 );
4689 if had_word && expecting_redirection_target {
4690 expecting_redirection_target = false;
4691 }
4692 if word_was_redirection_fd {
4693 at_command_start = true;
4694 }
4695 if inside_unclosed_double_paren_on_line(input, index) {
4696 index = next_index + '<'.len_utf8();
4697 continue;
4698 }
4699
4700 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4701 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4702 expecting_redirection_target = true;
4703 continue;
4704 }
4705
4706 let strip_tabs = input[next_index..].starts_with("<-");
4707 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4708 if let Some((delimiter_index, delimiter)) =
4709 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4710 {
4711 pending_heredocs.push((delimiter, strip_tabs));
4712 index = delimiter_index;
4713 expecting_redirection_target = false;
4714 } else {
4715 index = next_index;
4716 expecting_redirection_target = true;
4717 }
4718 }
4719 '\n' => {
4720 flush_scanned_command_subst_keyword(
4721 &mut current_word,
4722 &mut pending_case_headers,
4723 &mut case_clause_depths,
4724 depth,
4725 &mut current_word_started_at_command_start,
4726 );
4727 index = next_index;
4728 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4729 index =
4730 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4731 }
4732 at_command_start = true;
4733 expecting_redirection_target = false;
4734 }
4735 '$' if input[next_index..].starts_with('{') => {
4736 let had_word = !current_word.is_empty();
4737 flush_scanned_command_subst_keyword(
4738 &mut current_word,
4739 &mut pending_case_headers,
4740 &mut case_clause_depths,
4741 depth,
4742 &mut current_word_started_at_command_start,
4743 );
4744 if had_word && expecting_redirection_target {
4745 expecting_redirection_target = false;
4746 }
4747 let consumed = scan_command_subst_parameter_expansion_len(
4748 &input[next_index + '{'.len_utf8()..],
4749 subst_depth,
4750 )?;
4751 index = next_index + '{'.len_utf8() + consumed;
4752 if expecting_redirection_target {
4753 expecting_redirection_target = false;
4754 } else {
4755 at_command_start = false;
4756 }
4757 }
4758 '$' if input[next_index..].starts_with('(')
4759 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4760 {
4761 let had_word = !current_word.is_empty();
4762 flush_scanned_command_subst_keyword(
4763 &mut current_word,
4764 &mut pending_case_headers,
4765 &mut case_clause_depths,
4766 depth,
4767 &mut current_word_started_at_command_start,
4768 );
4769 if had_word && expecting_redirection_target {
4770 expecting_redirection_target = false;
4771 }
4772 let consumed = scan_command_substitution_body_len_inner(
4773 &input[next_index + '('.len_utf8()..],
4774 subst_depth + 1,
4775 )?;
4776 index = next_index + '('.len_utf8() + consumed;
4777 if expecting_redirection_target {
4778 expecting_redirection_target = false;
4779 } else {
4780 at_command_start = false;
4781 }
4782 }
4783 _ => {
4784 if ch.is_ascii_alphanumeric() || ch == '_' {
4785 if current_word.is_empty() && !expecting_redirection_target && at_command_start
4786 {
4787 current_word_started_at_command_start = true;
4788 at_command_start = false;
4789 }
4790 current_word.push(ch);
4791 } else {
4792 let had_word = !current_word.is_empty();
4793 flush_scanned_command_subst_keyword(
4794 &mut current_word,
4795 &mut pending_case_headers,
4796 &mut case_clause_depths,
4797 depth,
4798 &mut current_word_started_at_command_start,
4799 );
4800 if had_word && expecting_redirection_target {
4801 expecting_redirection_target = false;
4802 }
4803 match ch {
4804 ' ' | '\t' => {}
4805 ';' | '|' | '&' => {
4806 at_command_start = true;
4807 expecting_redirection_target = false;
4808 }
4809 _ => {
4810 if !expecting_redirection_target {
4811 at_command_start = false;
4812 }
4813 }
4814 }
4815 }
4816 index = next_index;
4817 }
4818 }
4819 }
4820
4821 None
4822}
4823
4824pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4825 scan_command_substitution_body_len_inner(input, 0)
4826}
4827
4828#[cfg(test)]
4829mod tests {
4830 use super::*;
4831
4832 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4833 match token.kind {
4834 kind if kind.is_word_like() => token.word_string(),
4835 TokenKind::Comment => token
4836 .span
4837 .slice(source)
4838 .strip_prefix('#')
4839 .map(str::to_string),
4840 TokenKind::Error => token
4841 .error_kind()
4842 .map(LexerErrorKind::message)
4843 .map(str::to_string),
4844 _ => None,
4845 }
4846 }
4847
4848 fn assert_next_token(
4849 lexer: &mut Lexer<'_>,
4850 expected_kind: TokenKind,
4851 expected_text: Option<&str>,
4852 ) {
4853 let token = lexer.next_lexed_token().unwrap();
4854 assert_eq!(token.kind, expected_kind);
4855 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4856 }
4857
4858 fn assert_next_token_with_comments(
4859 lexer: &mut Lexer<'_>,
4860 expected_kind: TokenKind,
4861 expected_text: Option<&str>,
4862 ) {
4863 let token = lexer.next_lexed_token_with_comments().unwrap();
4864 assert_eq!(token.kind, expected_kind);
4865 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4866 }
4867
4868 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4869 let mut lexer = Lexer::new(input);
4870
4871 while let Some(token) = lexer.next_lexed_token() {
4872 if token.kind == TokenKind::Newline {
4873 continue;
4874 }
4875
4876 assert_eq!(
4877 token.span.start.line, token.span.end.line,
4878 "token should stay on one line: {:?}",
4879 token
4880 );
4881 }
4882 }
4883
4884 #[test]
4885 fn test_simple_words() {
4886 let mut lexer = Lexer::new("echo hello world");
4887
4888 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4889 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4890 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4891 assert!(lexer.next_lexed_token().is_none());
4892 }
4893
4894 #[test]
4895 fn test_single_quoted_string() {
4896 let mut lexer = Lexer::new("echo 'hello world'");
4897
4898 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4899 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4901 assert!(lexer.next_lexed_token().is_none());
4902 }
4903
4904 #[test]
4905 fn test_double_quoted_string() {
4906 let mut lexer = Lexer::new("echo \"hello world\"");
4907
4908 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4909 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4910 assert!(lexer.next_lexed_token().is_none());
4911 }
4912
4913 #[test]
4914 fn test_brace_expansion_token_ignores_quoted_closers() {
4915 let mut lexer = Lexer::new("echo {\"}\",a}\n");
4916
4917 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4918 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
4919 assert_next_token(&mut lexer, TokenKind::Newline, None);
4920 assert!(lexer.next_lexed_token().is_none());
4921 }
4922
4923 #[test]
4924 fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
4925 let mut lexer = Lexer::new("echo {'a\\',b} next\n");
4926
4927 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4928 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
4929 assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
4930 assert_next_token(&mut lexer, TokenKind::Newline, None);
4931 assert!(lexer.next_lexed_token().is_none());
4932 }
4933
4934 #[test]
4935 fn test_double_quoted_expansion_token_keeps_source_backing() {
4936 let source = r#""$bar""#;
4937 let mut lexer = Lexer::new(source);
4938
4939 let token = lexer.next_lexed_token().unwrap();
4940 assert_eq!(token.kind, TokenKind::QuotedWord);
4941 assert_eq!(token.word_text(), Some("$bar"));
4942
4943 let word = token.word().unwrap();
4944 let segment = word.single_segment().unwrap();
4945 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4946 assert_eq!(segment.span().unwrap().slice(source), "$bar");
4947 }
4948
4949 #[test]
4950 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4951 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4952 let mut lexer = Lexer::new(source);
4953
4954 let token = lexer.next_lexed_token().unwrap();
4955 assert_eq!(token.kind, TokenKind::QuotedWord);
4956 assert_eq!(
4957 token.word_text(),
4958 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4959 );
4960 }
4961
4962 #[test]
4963 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4964 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4965 let mut lexer = Lexer::new(source);
4966
4967 let token = lexer.next_lexed_token().unwrap();
4968 assert_eq!(token.kind, TokenKind::QuotedWord);
4969 assert_eq!(
4970 token.word_text(),
4971 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4972 );
4973 }
4974
4975 #[test]
4976 fn test_mixed_word_keeps_segment_kinds() {
4977 let source = r#"foo"bar"'baz'"#;
4978 let mut lexer = Lexer::new(source);
4979
4980 let token = lexer.next_lexed_token().unwrap();
4981 assert_eq!(token.kind, TokenKind::Word);
4982
4983 let word = token.word().unwrap();
4984 let segments: Vec<_> = word
4985 .segments()
4986 .map(|segment| (segment.kind(), segment.as_str().to_string()))
4987 .collect();
4988
4989 assert_eq!(
4990 segments,
4991 vec![
4992 (LexedWordSegmentKind::Plain, "foo".to_string()),
4993 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4994 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4995 ]
4996 );
4997 assert_eq!(word.joined_text(), "foobarbaz");
4998 assert_eq!(
4999 word.segments()
5000 .next()
5001 .and_then(LexedWordSegment::span)
5002 .unwrap()
5003 .slice(source),
5004 "foo"
5005 );
5006 }
5007
5008 #[test]
5009 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5010 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5011
5012 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5013 let body = &source[..consumed];
5014
5015 assert!(body.contains("field, direction"));
5016 assert!(body.ends_with(')'));
5017 }
5018
5019 #[test]
5020 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5021 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5022
5023 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5024 let body = &source[..consumed];
5025
5026 assert!(body.contains("printf '%s' y"));
5027 assert!(body.ends_with(')'));
5028 }
5029
5030 #[test]
5031 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5032 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5033
5034 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5035 let body = &source[..consumed];
5036
5037 assert!(body.contains("printf %s 1,2"));
5038 assert!(body.ends_with(')'));
5039 }
5040
5041 #[test]
5042 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5043 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5044
5045 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5046 let body = &source[..consumed];
5047
5048 assert!(body.contains("field, direction"));
5049 assert!(body.ends_with(')'));
5050 }
5051
5052 #[test]
5053 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5054 let source = "printf %s ${x//foo/)},1)\"";
5055
5056 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5057 let body = &source[..consumed];
5058
5059 assert!(body.contains("${x//foo/)},1"));
5060 assert!(body.ends_with(')'));
5061 }
5062
5063 #[test]
5064 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5065 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5066
5067 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5068 let body = &source[..consumed];
5069
5070 assert!(body.contains("printf %s 1,2"));
5071 assert!(body.ends_with(')'));
5072 }
5073
5074 #[test]
5075 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5076 let source = "[[ \"$buf\" == (#b)(*) ]]";
5077 let index = source.find('#').expect("expected hash");
5078
5079 assert!(!hash_starts_comment(source, index));
5080 }
5081
5082 #[test]
5083 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5084 let source = "(#comment with )";
5085 let index = source.find('#').expect("expected hash");
5086
5087 assert!(hash_starts_comment(source, index));
5088 }
5089
5090 #[test]
5091 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5092 let source = "(( #c < 256 ))";
5093 let index = source.find('#').expect("expected hash");
5094
5095 assert!(!hash_starts_comment(source, index));
5096 }
5097
5098 #[test]
5099 fn test_hash_starts_comment_respects_quoted_double_parens() {
5100 let source = "printf '((' # comment";
5101 let index = source.find('#').expect("expected hash");
5102
5103 assert!(hash_starts_comment(source, index));
5104 }
5105
5106 #[test]
5107 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5108 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5109
5110 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5111 let body = &source[..consumed];
5112
5113 assert!(body.contains("printf %s 1,2"));
5114 assert!(body.ends_with(')'));
5115 }
5116
5117 #[test]
5118 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5119 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5120
5121 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5122 let body = &source[..consumed];
5123
5124 assert!(body.contains("printf %s 1,2"));
5125 assert!(body.ends_with(')'));
5126 }
5127
5128 #[test]
5129 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5130 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5131
5132 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5133 let body = &source[..consumed];
5134
5135 assert!(body.contains("printf %s 1,2"));
5136 assert!(body.ends_with(')'));
5137 }
5138
5139 #[test]
5140 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5141 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5142
5143 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5144 let body = &source[..consumed];
5145
5146 assert!(body.contains("printf %s 1,2"));
5147 assert!(body.ends_with("))"));
5148 }
5149
5150 #[test]
5151 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5152 let source = "printf %s 1,2; echo case in)\"";
5153
5154 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5155 let body = &source[..consumed];
5156
5157 assert!(body.contains("echo case in"));
5158 assert!(body.ends_with(')'));
5159 }
5160
5161 #[test]
5162 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5163 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5164
5165 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5166 let body = &source[..consumed];
5167
5168 assert!(body.contains("$'a\\'b'"));
5169 assert!(body.contains("printf %s 1,2"));
5170 assert!(body.ends_with(')'));
5171 }
5172
5173 #[test]
5174 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5175 let source = "printf %s `echo foo)`; printf %s ok)\"";
5176
5177 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5178 let body = &source[..consumed];
5179
5180 assert!(body.contains("`echo foo)`"));
5181 assert!(body.contains("printf %s ok"));
5182 assert!(body.ends_with(')'));
5183 }
5184
5185 #[test]
5186 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5187 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5188
5189 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5190 let body = &source[..consumed];
5191
5192 assert!(body.contains("${x/`echo }`/foo)},1"));
5193 assert!(body.ends_with(')'));
5194 }
5195
5196 #[test]
5197 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5198 {
5199 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5200
5201 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5202 let body = &source[..consumed];
5203
5204 assert!(body.contains("${x/<(echo })/foo)},1"));
5205 assert!(body.ends_with(')'));
5206 }
5207
5208 #[test]
5209 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5210 let source = "printf %s 1,2; echo case in)";
5211
5212 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5213 let body = &source[..consumed];
5214
5215 assert_eq!(body, source);
5216 }
5217
5218 #[test]
5219 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5220 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5221
5222 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5223 let body = &source[..consumed];
5224
5225 assert_eq!(body, source);
5226 }
5227
5228 #[test]
5229 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5230 let source = "printf %s `echo foo)`; printf %s ok)";
5231
5232 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5233 let body = &source[..consumed];
5234
5235 assert_eq!(body, source);
5236 }
5237
5238 #[test]
5239 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5240 let source = "echo \"$line\" | cut -d' ' -f2-)";
5241
5242 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5243 let body = &source[..consumed];
5244
5245 assert_eq!(body, source);
5246 }
5247
5248 #[test]
5249 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5250 let source = "echo \"${@}\" | tr -d '[:space:]')";
5251
5252 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5253 let body = &source[..consumed];
5254
5255 assert_eq!(body, source);
5256 }
5257
5258 #[test]
5259 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5260 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5261
5262 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5263 let body = &source[..consumed];
5264
5265 assert_eq!(body, source);
5266 }
5267
5268 #[test]
5269 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5270 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5271
5272 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5273 let body = &source[..consumed];
5274
5275 assert_eq!(body, source);
5276 }
5277
5278 #[test]
5279 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5280 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5281 let mut lexer = Lexer::new(source);
5282
5283 let first = lexer.next_lexed_token().expect("expected first token");
5284 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5285 assert_eq!(first.word_string().as_deref(), Some("echo"));
5286
5287 let second = lexer.next_lexed_token().expect("expected second token");
5288 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5289 assert_eq!(
5290 second.word_string().as_deref(),
5291 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5292 );
5293 }
5294
5295 #[test]
5296 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5297 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5298 let start = source.find("$(").expect("expected command substitution") + 2;
5299 let consumed =
5300 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5301 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5302 }
5303
5304 #[test]
5305 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5306 let source = "echo $(echo $(basename $filename .fuzz))";
5307 let start = source.find("$(").expect("expected command substitution") + 2;
5308 let consumed =
5309 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5310 assert_eq!(
5311 &source[start..start + consumed],
5312 "echo $(basename $filename .fuzz))"
5313 );
5314 }
5315
5316 #[test]
5317 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5318 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5319 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5320 assert_eq!(consumed, source.len());
5321 }
5322
5323 #[test]
5324 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5325 let source = "'foo'bar";
5326 let mut lexer = Lexer::new(source);
5327
5328 let token = lexer.next_lexed_token().unwrap();
5329 assert_eq!(token.kind, TokenKind::LiteralWord);
5330
5331 let word = token.word().unwrap();
5332 let segments: Vec<_> = word
5333 .segments()
5334 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5335 .collect();
5336
5337 assert_eq!(
5338 segments,
5339 vec![
5340 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5341 (LexedWordSegmentKind::Plain, "bar".to_string()),
5342 ]
5343 );
5344 assert_eq!(word.joined_text(), "foobar");
5345 assert_eq!(
5346 word.segments()
5347 .nth(1)
5348 .and_then(LexedWordSegment::span)
5349 .unwrap()
5350 .slice(source),
5351 "bar"
5352 );
5353 }
5354
5355 #[test]
5356 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5357 let source = "$(printf hi)";
5358 let mut lexer = Lexer::new(source);
5359
5360 let token = lexer.next_lexed_token().unwrap();
5361 assert_eq!(token.kind, TokenKind::Word);
5362
5363 let word = token.word().unwrap();
5364 let segment = word.single_segment().unwrap();
5365 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5366 assert_eq!(segment.as_str(), source);
5367 assert_eq!(segment.span().unwrap().slice(source), source);
5368 }
5369
5370 #[test]
5371 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5372 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5373 let mut lexer = Lexer::new(source);
5374
5375 let token = lexer.next_lexed_token().unwrap();
5376 assert_eq!(token.kind, TokenKind::Word);
5377
5378 let word = token.word().unwrap();
5379 let segment = word.single_segment().unwrap();
5380 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5381 assert_eq!(segment.as_str(), source);
5382 assert_eq!(segment.span().unwrap().slice(source), source);
5383 }
5384
5385 #[test]
5386 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5387 let source = "\"foo\"$(printf hi)";
5388 let mut lexer = Lexer::new(source);
5389
5390 let token = lexer.next_lexed_token().unwrap();
5391 assert_eq!(token.kind, TokenKind::Word);
5392
5393 let word = token.word().unwrap();
5394 let continuation = word.segments().nth(1).unwrap();
5395 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5396 assert_eq!(continuation.as_str(), "$(printf hi)");
5397 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5398 }
5399
5400 #[test]
5401 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5402 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5403 let mut lexer = Lexer::new(source);
5404
5405 let token = lexer.next_lexed_token().unwrap();
5406 assert_eq!(token.kind, TokenKind::QuotedWord);
5407
5408 let word = token.word().unwrap();
5409 let segment = word.single_segment().unwrap();
5410 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5411 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5412 assert_eq!(
5413 segment.span().unwrap().slice(source),
5414 "${arr[$RANDOM % ${#arr[@]}]}"
5415 );
5416 }
5417
5418 #[test]
5419 fn test_ansi_c_control_escape_can_consume_quote() {
5420 let mut lexer = Lexer::new("echo $'\\c''");
5421
5422 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5423 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5424 assert!(lexer.next_lexed_token().is_none());
5425 }
5426
5427 #[test]
5428 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5429 let source = r#"out_line="${out_line//'"'/'\"'}"
5430"#;
5431 let mut lexer = Lexer::new(source);
5432
5433 assert_next_token(
5434 &mut lexer,
5435 TokenKind::Word,
5436 Some(r#"out_line=${out_line//'"'/'"'}"#),
5437 );
5438 assert_next_token(&mut lexer, TokenKind::Newline, None);
5439 assert!(lexer.next_lexed_token().is_none());
5440 }
5441
5442 #[test]
5443 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5444 let source = r#"out_line="${out_line//'"'/'\"'}"
5445echo "Error: Missing python3!"
5446cat << 'EOF' > "${pywrapper}"
5447import os
5448EOF
5449"#;
5450 let mut lexer = Lexer::new(source);
5451
5452 assert_next_token(
5453 &mut lexer,
5454 TokenKind::Word,
5455 Some(r#"out_line=${out_line//'"'/'"'}"#),
5456 );
5457 assert_next_token(&mut lexer, TokenKind::Newline, None);
5458 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5459 assert_next_token(
5460 &mut lexer,
5461 TokenKind::QuotedWord,
5462 Some("Error: Missing python3!"),
5463 );
5464 assert_next_token(&mut lexer, TokenKind::Newline, None);
5465 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5466 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5467 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5468 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5469 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5470 }
5471
5472 #[test]
5473 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5474 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5475 let mut lexer = Lexer::new(source);
5476
5477 let token = lexer.next_lexed_token().unwrap();
5478 assert_eq!(token.kind, TokenKind::Word);
5479 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5480 assert!(token.source_slice(source).is_none());
5481 assert_eq!(
5482 token.word_string().as_deref(),
5483 Some("crypt=${crypt//\\/\\\\}")
5484 );
5485 assert_next_token(&mut lexer, TokenKind::Newline, None);
5486 assert!(lexer.next_lexed_token().is_none());
5487 }
5488
5489 #[test]
5490 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5491 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5492 let mut lexer = Lexer::new(source);
5493
5494 assert_next_token(
5495 &mut lexer,
5496 TokenKind::Word,
5497 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5498 );
5499 assert_next_token(&mut lexer, TokenKind::Newline, None);
5500 assert_next_token(&mut lexer, TokenKind::Newline, None);
5501 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5502 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5503 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5504 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5505 assert_next_token(&mut lexer, TokenKind::Newline, None);
5506 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5507 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5508 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5509 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5510 assert_next_token(&mut lexer, TokenKind::Newline, None);
5511 assert_next_token(
5512 &mut lexer,
5513 TokenKind::Word,
5514 Some(
5515 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5516 ),
5517 );
5518 assert_next_token(&mut lexer, TokenKind::Newline, None);
5519 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5520 assert_next_token(&mut lexer, TokenKind::Newline, None);
5521 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5522 assert_next_token(&mut lexer, TokenKind::Newline, None);
5523 assert!(lexer.next_lexed_token().is_none());
5524 }
5525
5526 #[test]
5527 fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5528 let source = "case \"$word\" in\n {) : ;;\n :) : ;;\nesac\n";
5529 let mut lexer = Lexer::new(source);
5530
5531 assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5532 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5533 assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5534 assert_next_token(&mut lexer, TokenKind::Newline, None);
5535 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5536 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5537 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5538 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5539 assert_next_token(&mut lexer, TokenKind::Newline, None);
5540 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5541 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5542 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5543 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5544 assert_next_token(&mut lexer, TokenKind::Newline, None);
5545 assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5546 assert_next_token(&mut lexer, TokenKind::Newline, None);
5547 assert!(lexer.next_lexed_token().is_none());
5548 }
5549
5550 #[test]
5551 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5552 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5553 let mut lexer = Lexer::new(source);
5554
5555 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5556 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5557 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5558 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5559 assert_next_token(&mut lexer, TokenKind::And, None);
5560 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5561 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5562 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5563 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5564 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5565 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5566 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5567 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5568 assert_next_token(&mut lexer, TokenKind::Newline, None);
5569 assert!(lexer.next_lexed_token().is_none());
5570 }
5571
5572 #[test]
5573 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5574 let source = "echo -{$(echo a),b}-\n";
5575 let mut lexer = Lexer::new(source);
5576
5577 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5578 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5579 assert_next_token(&mut lexer, TokenKind::Newline, None);
5580 assert!(lexer.next_lexed_token().is_none());
5581 }
5582
5583 #[test]
5584 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5585 let source = "echo -{$((1 + 2)),b}-\n";
5586 let mut lexer = Lexer::new(source);
5587
5588 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5589 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5590 assert_next_token(&mut lexer, TokenKind::Newline, None);
5591 assert!(lexer.next_lexed_token().is_none());
5592 }
5593
5594 #[test]
5595 fn test_operators() {
5596 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5597
5598 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5599 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5600 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5601 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5602 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5603 assert_next_token(&mut lexer, TokenKind::And, None);
5604 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5605 assert_next_token(&mut lexer, TokenKind::Or, None);
5606 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5607 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5608 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5609 assert_next_token(&mut lexer, TokenKind::Background, None);
5610 assert!(lexer.next_lexed_token().is_none());
5611 }
5612
5613 #[test]
5614 fn test_double_left_bracket_requires_separator() {
5615 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5616
5617 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5618 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5619 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5620 assert_next_token(&mut lexer, TokenKind::Newline, None);
5621 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5622 assert_next_token(&mut lexer, TokenKind::Newline, None);
5623 assert!(lexer.next_lexed_token().is_none());
5624 }
5625
5626 #[test]
5627 fn test_redirects() {
5628 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5629
5630 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5631 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5632 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5633 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5634 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5635 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5636 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5637 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5638 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5639 let token = lexer.next_lexed_token().unwrap();
5640 assert_eq!(token.kind, TokenKind::Clobber);
5641 assert_eq!(token.fd_value(), Some(2));
5642 assert_eq!(token_text(&token, lexer.input), None);
5643 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5644 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5645 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5646 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5647 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5648 assert_next_token(&mut lexer, TokenKind::HereString, None);
5649 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5650 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5651 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5652 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5653 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5654 }
5655
5656 #[test]
5657 fn test_comment() {
5658 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5659
5660 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5661 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5662 assert_next_token(&mut lexer, TokenKind::Newline, None);
5663 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5664 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5665 }
5666
5667 #[test]
5668 fn test_comment_token_with_span() {
5669 let mut lexer = Lexer::new("# lead\necho hi # tail");
5670
5671 let comment = lexer.next_lexed_token_with_comments().unwrap();
5672 assert_eq!(comment.kind, TokenKind::Comment);
5673 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5674 assert_eq!(comment.span.start.line, 1);
5675 assert_eq!(comment.span.start.column, 1);
5676 assert_eq!(comment.span.end.line, 1);
5677 assert_eq!(comment.span.end.column, 7);
5678
5679 assert_next_token(&mut lexer, TokenKind::Newline, None);
5680 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5681 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5682
5683 let inline = lexer.next_lexed_token_with_comments().unwrap();
5684 assert_eq!(inline.kind, TokenKind::Comment);
5685 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5686 assert_eq!(inline.span.start.line, 2);
5687 assert_eq!(inline.span.start.column, 9);
5688 }
5689
5690 #[test]
5691 fn test_comment_token_preserves_hash_boundaries() {
5692 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5693
5694 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5695 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5696 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5697 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5698 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5699 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5700 assert!(lexer.next_lexed_token_with_comments().is_none());
5701 }
5702
5703 #[test]
5704 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5705 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5706
5707 let mut saw_comment = false;
5708 while let Some(token) = lexer.next_lexed_token_with_comments() {
5709 if token.kind == TokenKind::Comment {
5710 saw_comment = true;
5711 break;
5712 }
5713 }
5714
5715 assert!(
5716 !saw_comment,
5717 "zsh inline glob controls inside [[ ]] should not lex as comments"
5718 );
5719 }
5720
5721 #[test]
5722 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5723 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5724
5725 let mut saw_comment = false;
5726 while let Some(token) = lexer.next_lexed_token_with_comments() {
5727 if token.kind == TokenKind::Comment {
5728 saw_comment = true;
5729 break;
5730 }
5731 }
5732
5733 assert!(
5734 !saw_comment,
5735 "zsh arithmetic char literals inside (( )) should not lex as comments"
5736 );
5737 }
5738
5739 #[test]
5740 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5741 let mut lexer = Lexer::new(
5742 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5743 );
5744
5745 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5746 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5747 assert_next_token(
5748 &mut lexer,
5749 TokenKind::LiteralWord,
5750 Some("\\e]133;C;cmdline_url=%s\\a"),
5751 );
5752 assert_next_token(
5753 &mut lexer,
5754 TokenKind::QuotedWord,
5755 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5756 );
5757 assert_next_token(&mut lexer, TokenKind::Newline, None);
5758 }
5759
5760 #[test]
5761 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5762 let mut lexer = Lexer::new(
5763 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5764 );
5765
5766 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5767 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5768 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5769 assert_next_token(&mut lexer, TokenKind::Newline, None);
5770 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5771 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5772 assert_next_token(
5773 &mut lexer,
5774 TokenKind::LiteralWord,
5775 Some("\\e]133;C;cmdline_url=%s\\a"),
5776 );
5777 assert_next_token(
5778 &mut lexer,
5779 TokenKind::QuotedWord,
5780 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5781 );
5782 assert_next_token(&mut lexer, TokenKind::Newline, None);
5783 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5784 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5785 assert_next_token(&mut lexer, TokenKind::Newline, None);
5786 }
5787
5788 #[test]
5789 fn test_variable_words() {
5790 let mut lexer = Lexer::new("echo $HOME $USER");
5791
5792 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5793 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5794 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5795 assert!(lexer.next_lexed_token().is_none());
5796 }
5797
5798 #[test]
5799 fn test_pipeline_tokens() {
5800 let mut lexer = Lexer::new("echo hello | cat");
5801
5802 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5803 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5804 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5805 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5806 assert!(lexer.next_lexed_token().is_none());
5807 }
5808
5809 #[test]
5810 fn test_read_heredoc() {
5811 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5813 let content = lexer.read_heredoc("EOF", false);
5814 assert_eq!(content.content, "hello\nworld\n");
5815 }
5816
5817 #[test]
5818 fn test_read_heredoc_single_line() {
5819 let mut lexer = Lexer::new("\ntest\nEOF");
5820 let content = lexer.read_heredoc("EOF", false);
5821 assert_eq!(content.content, "test\n");
5822 }
5823
5824 #[test]
5825 fn test_read_heredoc_full_scenario() {
5826 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5828
5829 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5831 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5832 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5833
5834 let content = lexer.read_heredoc("EOF", false);
5836 assert_eq!(content.content, "hello\nworld\n");
5837 }
5838
5839 #[test]
5840 fn test_read_heredoc_with_redirect() {
5841 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5843 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5844 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5845 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5846 let content = lexer.read_heredoc("EOF", false);
5847 assert_eq!(content.content, "hello\n");
5848 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5850 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5851 }
5852
5853 #[test]
5854 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5855 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
5856 let mut lexer = Lexer::new(source);
5857
5858 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5859 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5860 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5861
5862 let heredoc = lexer.read_heredoc("EOF", false);
5863 assert_eq!(heredoc.content, "hello\n");
5864
5865 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5866 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5867 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5868 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5869 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5870 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5871 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5872 }
5873
5874 #[test]
5875 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5876 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5877 let mut lexer = Lexer::new(source);
5878
5879 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5880 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5881 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5882
5883 let heredoc = lexer.read_heredoc("EOF", false);
5884 assert_eq!(heredoc.content, "1\n2\n3\n");
5885 }
5886
5887 #[test]
5888 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5889 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5890 let mut lexer = Lexer::new(source);
5891
5892 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5893 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5894 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5895
5896 let heredoc = lexer.read_heredoc("EOF", false);
5897 assert_eq!(heredoc.content, "body\n");
5898 }
5899
5900 #[test]
5901 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5902 let source = "cat <<EOF # note \\\nbody\nEOF\n";
5903 let mut lexer = Lexer::new(source);
5904
5905 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5906 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5907 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5908
5909 let heredoc = lexer.read_heredoc("EOF", false);
5910 assert_eq!(heredoc.content, "body\n");
5911 }
5912
5913 #[test]
5914 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5915 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5916 let mut lexer = Lexer::new(source);
5917
5918 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5919 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5920 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5921 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5922
5923 let heredoc = lexer.read_heredoc("EOF", false);
5924 assert_eq!(heredoc.content, "body\n");
5925
5926 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5927 }
5928
5929 #[test]
5930 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5931 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5932 let mut lexer = Lexer::new(source);
5933
5934 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5935 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5936 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5937
5938 let heredoc = lexer.read_heredoc("EOF", false);
5939 assert_eq!(heredoc.content, "1\n");
5940
5941 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5942 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5943 }
5944
5945 #[test]
5946 fn test_read_heredoc_with_redirect_preserves_following_spans() {
5947 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5948 let mut lexer = Lexer::new(source);
5949
5950 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5951 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5952 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5953
5954 let heredoc = lexer.read_heredoc("EOF", false);
5955 assert_eq!(heredoc.content, "hello\n");
5956
5957 let redirect = lexer.next_lexed_token_with_comments().unwrap();
5958 assert_eq!(redirect.kind, TokenKind::RedirectOut);
5959 assert_eq!(redirect.span.slice(source), ">");
5960
5961 let target = lexer.next_lexed_token_with_comments().unwrap();
5962 assert_eq!(target.kind, TokenKind::Word);
5963 assert_eq!(
5964 token_text(&target, lexer.input).as_deref(),
5965 Some("file.txt")
5966 );
5967 assert_eq!(target.span.slice(source), "file.txt");
5968
5969 let newline = lexer.next_lexed_token_with_comments().unwrap();
5970 assert_eq!(newline.kind, TokenKind::Newline);
5971 assert_eq!(newline.span.slice(source), "\n");
5972
5973 let comment = lexer.next_lexed_token_with_comments().unwrap();
5974 assert_eq!(comment.kind, TokenKind::Comment);
5975 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5976 assert_eq!(comment.span.slice(source), "# done");
5977 }
5978
5979 #[test]
5980 fn test_comment_with_unicode() {
5981 let source = "# café résumé\necho ok";
5983 let mut lexer = Lexer::new(source);
5984
5985 let comment = lexer.next_lexed_token_with_comments().unwrap();
5986 assert_eq!(comment.kind, TokenKind::Comment);
5987 assert_eq!(
5988 token_text(&comment, lexer.input).as_deref(),
5989 Some(" café résumé")
5990 );
5991 let start = comment.span.start.offset;
5993 let end = comment.span.end.offset;
5994 assert_eq!(start, 0);
5995 assert_eq!(&source[start..end], "# café résumé");
5996 assert!(source.is_char_boundary(start));
5997 assert!(source.is_char_boundary(end));
5998
5999 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6000 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6001 }
6002
6003 #[test]
6004 fn test_comment_with_cjk_characters() {
6005 let source = "# 你好世界\necho ok";
6007 let mut lexer = Lexer::new(source);
6008
6009 let comment = lexer.next_lexed_token_with_comments().unwrap();
6010 assert_eq!(comment.kind, TokenKind::Comment);
6011 assert_eq!(
6012 token_text(&comment, lexer.input).as_deref(),
6013 Some(" 你好世界")
6014 );
6015 let start = comment.span.start.offset;
6016 let end = comment.span.end.offset;
6017 assert_eq!(&source[start..end], "# 你好世界");
6018 assert!(source.is_char_boundary(start));
6019 assert!(source.is_char_boundary(end));
6020 }
6021
6022 #[test]
6023 fn test_heredoc_with_comments_inside() {
6024 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6026 let mut lexer = Lexer::new(source);
6027
6028 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6029 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6030 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6031
6032 let heredoc = lexer.read_heredoc("EOF", false);
6033 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6034
6035 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6038 let comment = lexer.next_lexed_token_with_comments().unwrap();
6039 assert_eq!(comment.kind, TokenKind::Comment);
6040 assert_eq!(
6041 token_text(&comment, lexer.input).as_deref(),
6042 Some(" real comment")
6043 );
6044 }
6045
6046 #[test]
6047 fn test_heredoc_with_hash_in_variable() {
6048 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6050 let mut lexer = Lexer::new(source);
6051
6052 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6053 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6054 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6055
6056 let heredoc = lexer.read_heredoc("EOF", false);
6057 assert_eq!(heredoc.content, "val=${x#prefix}\n");
6058 }
6059
6060 #[test]
6061 fn test_heredoc_span_does_not_leak() {
6062 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6065 let mut lexer = Lexer::new(source);
6066
6067 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6068 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6069 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6070
6071 let heredoc = lexer.read_heredoc("EOF", false);
6072 let start = heredoc.content_span.start.offset;
6073 let end = heredoc.content_span.end.offset;
6074 assert!(
6075 end <= source.len(),
6076 "heredoc span end ({end}) exceeds source length ({})",
6077 source.len()
6078 );
6079 assert_eq!(&source[start..end], "hello\nworld\n");
6080
6081 assert_next_token(&mut lexer, TokenKind::Newline, None);
6083 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6084 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6085 }
6086
6087 #[test]
6088 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6089 let source = "\
6090cat <<\\_ACEOF
6091Use these variables to override the choices made by `configure' or to help
6092it to find libraries and programs with nonstandard names/locations.
6093_ACEOF
6094ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6095ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6096";
6097 let mut lexer = Lexer::new(source);
6098
6099 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6100 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6101 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6102 assert_eq!(delimiter.kind, TokenKind::Word);
6103 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6104
6105 let heredoc = lexer.read_heredoc("_ACEOF", false);
6106 assert_eq!(
6107 heredoc.content,
6108 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6109 );
6110
6111 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6112
6113 let first = lexer.next_lexed_token_with_comments().unwrap();
6114 assert_eq!(first.kind, TokenKind::Word);
6115 assert_eq!(
6116 first.span.slice(source),
6117 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6118 );
6119 let first_segments = first
6120 .word()
6121 .unwrap()
6122 .segments()
6123 .map(|segment| {
6124 (
6125 segment.kind(),
6126 segment.as_str().to_string(),
6127 segment.span().map(|span| span.slice(source).to_string()),
6128 )
6129 })
6130 .collect::<Vec<_>>();
6131 assert_eq!(
6132 first_segments,
6133 vec![
6134 (
6135 LexedWordSegmentKind::Plain,
6136 "ac_dir_suffix=/".to_string(),
6137 Some("ac_dir_suffix=/".to_string()),
6138 ),
6139 (
6140 LexedWordSegmentKind::Plain,
6141 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6142 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6143 ),
6144 ]
6145 );
6146
6147 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6148
6149 let second = lexer.next_lexed_token_with_comments().unwrap();
6150 assert_eq!(second.kind, TokenKind::Word);
6151 assert_eq!(
6152 second.span.slice(source),
6153 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6154 );
6155 let second_segments = second
6156 .word()
6157 .unwrap()
6158 .segments()
6159 .map(|segment| {
6160 (
6161 segment.kind(),
6162 segment.as_str().to_string(),
6163 segment.span().map(|span| span.slice(source).to_string()),
6164 )
6165 })
6166 .collect::<Vec<_>>();
6167 assert_eq!(
6168 second_segments,
6169 vec![
6170 (
6171 LexedWordSegmentKind::Plain,
6172 "ac_top_builddir_sub=".to_string(),
6173 Some("ac_top_builddir_sub=".to_string()),
6174 ),
6175 (
6176 LexedWordSegmentKind::Plain,
6177 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6178 Some(
6179 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6180 .to_string(),
6181 ),
6182 ),
6183 ]
6184 );
6185 }
6186
6187 #[test]
6188 fn test_heredoc_with_unicode_content() {
6189 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6191 let mut lexer = Lexer::new(source);
6192
6193 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6194 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6195 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6196
6197 let heredoc = lexer.read_heredoc("EOF", false);
6198 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6199 let start = heredoc.content_span.start.offset;
6200 let end = heredoc.content_span.end.offset;
6201 assert!(
6202 source.is_char_boundary(start),
6203 "heredoc span start ({start}) not on char boundary"
6204 );
6205 assert!(
6206 source.is_char_boundary(end),
6207 "heredoc span end ({end}) not on char boundary"
6208 );
6209 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6210 }
6211
6212 #[test]
6213 fn test_assoc_compound_assignment() {
6214 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6217 assert_next_token(
6218 &mut lexer,
6219 TokenKind::Word,
6220 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6221 );
6222 assert!(lexer.next_lexed_token().is_none());
6223 }
6224
6225 #[test]
6226 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6227 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6228 let mut lexer = Lexer::new(source);
6229
6230 let token = lexer.next_lexed_token().unwrap();
6231 assert_eq!(token.kind, TokenKind::Word);
6232 assert_eq!(token.span.slice(source), source);
6233 assert!(lexer.next_lexed_token().is_none());
6234 }
6235
6236 #[test]
6237 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6238 let source = r#"foo\_bar@(baz|qux)"#;
6239 let mut lexer = Lexer::new(source);
6240
6241 let token = lexer.next_lexed_token().unwrap();
6242 assert_eq!(token.kind, TokenKind::Word);
6243 assert_eq!(token.span.slice(source), source);
6244 assert!(lexer.next_lexed_token().is_none());
6245 }
6246
6247 #[test]
6248 fn test_indexed_array_not_collapsed() {
6249 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6252 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6253 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6254 }
6255
6256 #[test]
6257 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6258 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6259 let mut lexer = Lexer::new(source);
6260
6261 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6262 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6263
6264 let token = lexer.next_lexed_token().unwrap();
6265 assert_eq!(token.kind, TokenKind::Word);
6266 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6267
6268 let word = token.word().unwrap();
6269 let segments: Vec<_> = word
6270 .segments()
6271 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6272 .collect();
6273 assert_eq!(
6274 segments,
6275 vec![
6276 (
6277 LexedWordSegmentKind::DoubleQuoted,
6278 "$plugin_dir".to_string()
6279 ),
6280 (LexedWordSegmentKind::Plain, "/*".to_string()),
6281 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6282 ]
6283 );
6284
6285 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6286 assert!(lexer.next_lexed_token().is_none());
6287 }
6288
6289 #[test]
6290 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6291 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6292 let mut lexer = Lexer::new(source);
6293
6294 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6295 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6296
6297 let token = lexer.next_lexed_token().unwrap();
6298 assert_eq!(token.kind, TokenKind::Word);
6299 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6300
6301 let word = token.word().unwrap();
6302 let segments: Vec<_> = word
6303 .segments()
6304 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6305 .collect();
6306 assert_eq!(
6307 segments,
6308 vec![
6309 (
6310 LexedWordSegmentKind::DoubleQuoted,
6311 "$__GREP_CACHE_FILE".to_string()
6312 ),
6313 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6314 ]
6315 );
6316
6317 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6318 assert!(lexer.next_lexed_token().is_none());
6319 }
6320
6321 #[test]
6322 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6323 let source = r#"$dir/${~pats}(N)"#;
6324 let mut lexer = Lexer::new(source);
6325
6326 let token = lexer.next_lexed_token().unwrap();
6327 assert_eq!(token.kind, TokenKind::Word);
6328 assert_eq!(token.span.slice(source), source);
6329 assert!(lexer.next_lexed_token().is_none());
6330 }
6331
6332 #[test]
6333 fn test_dollar_word_does_not_absorb_function_parens() {
6334 let mut lexer = Lexer::new(r#"foo$x()"#);
6335
6336 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6337 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6338 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6339 assert!(lexer.next_lexed_token().is_none());
6340 }
6341
6342 #[test]
6343 fn test_command_substitution_word_does_not_absorb_function_parens() {
6344 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6345
6346 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6347 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6348 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6349 assert!(lexer.next_lexed_token().is_none());
6350 }
6351
6352 #[test]
6355 fn test_digit_at_eof_no_panic() {
6356 let mut lexer = Lexer::new("2");
6358 let token = lexer.next_lexed_token();
6359 assert!(token.is_some());
6360 }
6361
6362 #[test]
6364 fn test_nested_brace_expansion_single_token() {
6365 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6367 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6368 assert!(lexer.next_lexed_token().is_none());
6370 }
6371
6372 #[test]
6374 fn test_simple_brace_expansion_unchanged() {
6375 let mut lexer = Lexer::new("${foo}");
6376 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6377 assert!(lexer.next_lexed_token().is_none());
6378 }
6379
6380 #[test]
6381 fn test_nvm_fixture_lexes_without_stalling() {
6382 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6383 let mut lexer = Lexer::new(input);
6384 let mut tokens = 0usize;
6385
6386 while lexer.next_lexed_token().is_some() {
6387 tokens += 1;
6388 assert!(
6389 tokens < 100_000,
6390 "lexer should continue making progress on the nvm fixture"
6391 );
6392 }
6393
6394 assert!(tokens > 0, "nvm fixture should produce at least one token");
6395 }
6396
6397 #[test]
6398 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6399 let input = concat!(
6400 "case \"${_input_type:-}\" in\n",
6401 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6402 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6403 "esac\n",
6404 );
6405
6406 assert_non_newline_tokens_stay_on_one_line(input);
6407
6408 let mut lexer = Lexer::new(input);
6409 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6410 .map(|token| (token.kind, token_text(&token, input)))
6411 .collect::<Vec<_>>();
6412 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6413 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6414 }
6415
6416 #[test]
6417 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6418 let input = concat!(
6419 "case $2 in\n",
6420 " cygwin*) bin='cygwin32/bin' ;|\n",
6421 "esac\n",
6422 );
6423
6424 let mut lexer = Lexer::new(input);
6425 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6426 .map(|token| (token.kind, token_text(&token, input)))
6427 .collect::<Vec<_>>();
6428
6429 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6430 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6431 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6432 }
6433
6434 #[test]
6435 fn test_inline_if_with_array_append_stays_line_local() {
6436 let input = concat!(
6437 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6438 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6439 );
6440
6441 assert_non_newline_tokens_stay_on_one_line(input);
6442 }
6443
6444 #[test]
6445 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6446 let source = "unsetopt interactive_comments\n#literal\n";
6447 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6448 let mut lexer = Lexer::with_profile(source, &profile);
6449
6450 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6451 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6452 assert_next_token(&mut lexer, TokenKind::Newline, None);
6453 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6454 }
6455
6456 #[test]
6457 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6458 let source = "setopt rc_quotes\nprint 'a''b'\n";
6459 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6460 let mut lexer = Lexer::with_profile(source, &profile);
6461
6462 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6463 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6464 assert_next_token(&mut lexer, TokenKind::Newline, None);
6465 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6466 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6467 }
6468
6469 #[test]
6470 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6471 let source = "setopt ignore_braces\n{ echo }\n";
6472 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6473 let mut lexer = Lexer::with_profile(source, &profile);
6474
6475 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6476 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6477 assert_next_token(&mut lexer, TokenKind::Newline, None);
6478 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6479 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6480 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6481 }
6482
6483 #[test]
6484 fn test_heredoc_in_arithmetic_fuzz_crash() {
6485 let data: &[u8] = &[
6489 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6490 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6491 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6492 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6493 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6494 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6495 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6496 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6497 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6498 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6499 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6500 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6501 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6502 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6503 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6504 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6505 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6506 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6507 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6508 ];
6509 let input = std::str::from_utf8(data).unwrap();
6510 let script = format!("echo $(({input}))\n");
6511 let _ = crate::parser::Parser::new(&script).parse();
6513 }
6514}