1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335 pub kind: TokenKind,
337 pub span: Span,
339 pub(crate) flags: TokenFlags,
340 payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345 match kind {
346 TokenKind::Word => LexedWordSegmentKind::Plain,
347 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349 _ => LexedWordSegmentKind::Composite,
350 }
351 }
352
353 pub(crate) fn punctuation(kind: TokenKind) -> Self {
354 Self {
355 kind,
356 span: Span::new(),
357 flags: TokenFlags::empty(),
358 payload: TokenPayload::None,
359 }
360 }
361
362 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363 let flags = if word.has_cooked_text() {
364 TokenFlags::cooked_text()
365 } else {
366 TokenFlags::empty()
367 };
368
369 Self {
370 kind,
371 span: Span::new(),
372 flags,
373 payload: TokenPayload::Word(word),
374 }
375 }
376
377 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378 Self::with_word_payload(
379 kind,
380 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381 )
382 }
383
384 fn owned_word(kind: TokenKind, text: String) -> Self {
385 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386 }
387
388 fn comment() -> Self {
389 Self {
390 kind: TokenKind::Comment,
391 span: Span::new(),
392 flags: TokenFlags::empty(),
393 payload: TokenPayload::None,
394 }
395 }
396
397 fn fd(kind: TokenKind, fd: i32) -> Self {
398 Self {
399 kind,
400 span: Span::new(),
401 flags: TokenFlags::empty(),
402 payload: TokenPayload::Fd(fd),
403 }
404 }
405
406 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407 Self {
408 kind,
409 span: Span::new(),
410 flags: TokenFlags::empty(),
411 payload: TokenPayload::FdPair(src_fd, dst_fd),
412 }
413 }
414
415 fn error(kind: LexerErrorKind) -> Self {
416 Self {
417 kind: TokenKind::Error,
418 span: Span::new(),
419 flags: TokenFlags::empty(),
420 payload: TokenPayload::Error(kind),
421 }
422 }
423
424 pub(crate) fn with_span(mut self, span: Span) -> Self {
425 self.span = span;
426 self
427 }
428
429 pub(crate) fn rebased(mut self, base: Position) -> Self {
430 self.span = self.span.rebased(base);
431 self.payload = match self.payload {
432 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433 payload => payload,
434 };
435 self
436 }
437
438 pub(crate) fn with_synthetic_flag(mut self) -> Self {
439 self.flags = self.flags.with_synthetic();
440 self
441 }
442
443 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444 let payload = match self.payload {
445 TokenPayload::None => TokenPayload::None,
446 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449 TokenPayload::Error(kind) => TokenPayload::Error(kind),
450 };
451
452 LexedToken {
453 kind: self.kind,
454 span: self.span,
455 flags: self.flags,
456 payload,
457 }
458 }
459
460 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461 let payload = match self.payload {
462 TokenPayload::None => TokenPayload::None,
463 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466 TokenPayload::Error(kind) => TokenPayload::Error(kind),
467 };
468
469 LexedToken {
470 kind: self.kind,
471 span: self.span,
472 flags: self.flags,
473 payload,
474 }
475 }
476
477 pub fn word_text(&self) -> Option<&str> {
479 self.kind
480 .is_word_like()
481 .then_some(())
482 .and_then(|_| match &self.payload {
483 TokenPayload::Word(word) => word.text(),
484 _ => None,
485 })
486 }
487
488 pub fn word_string(&self) -> Option<String> {
490 self.kind
491 .is_word_like()
492 .then_some(())
493 .and_then(|_| match &self.payload {
494 TokenPayload::Word(word) => Some(word.joined_text()),
495 _ => None,
496 })
497 }
498
499 pub fn word(&self) -> Option<&LexedWord<'a>> {
501 match &self.payload {
502 TokenPayload::Word(word) => Some(word),
503 _ => None,
504 }
505 }
506
507 pub fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510 return None;
511 }
512
513 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514 .then(|| &source[self.span.start.offset..self.span.end.offset])
515 }
516
517 pub fn fd_value(&self) -> Option<i32> {
519 match self.payload {
520 TokenPayload::Fd(fd) => Some(fd),
521 _ => None,
522 }
523 }
524
525 pub fn fd_pair_value(&self) -> Option<(i32, i32)> {
527 match self.payload {
528 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529 _ => None,
530 }
531 }
532
533 pub fn error_kind(&self) -> Option<LexerErrorKind> {
535 match self.payload {
536 TokenPayload::Error(kind) => Some(kind),
537 _ => None,
538 }
539 }
540}
541
542#[derive(Debug, Clone, PartialEq)]
544pub struct HeredocRead {
545 pub content: String,
547 pub content_span: Span,
549}
550
551const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554
555#[derive(Clone, Debug)]
556struct Cursor<'a> {
557 rest: &'a str,
558}
559
560impl<'a> Cursor<'a> {
561 fn new(source: &'a str) -> Self {
562 Self { rest: source }
563 }
564
565 fn first(&self) -> Option<char> {
566 self.rest.chars().next()
567 }
568
569 fn second(&self) -> Option<char> {
570 let mut chars = self.rest.chars();
571 chars.next()?;
572 chars.next()
573 }
574
575 fn third(&self) -> Option<char> {
576 let mut chars = self.rest.chars();
577 chars.next()?;
578 chars.next()?;
579 chars.next()
580 }
581
582 fn bump(&mut self) -> Option<char> {
583 let ch = self.first()?;
584 self.rest = &self.rest[ch.len_utf8()..];
585 Some(ch)
586 }
587
588 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
589 let start = self.rest;
590 let mut end = 0;
591
592 for ch in start.chars() {
593 if !predicate(ch) {
594 break;
595 }
596 end += ch.len_utf8();
597 }
598
599 self.rest = &start[end..];
600 &start[..end]
601 }
602
603 fn rest(&self) -> &'a str {
604 self.rest
605 }
606
607 fn skip_bytes(&mut self, count: usize) {
608 self.rest = &self.rest[count..];
609 }
610
611 fn find_byte(&self, byte: u8) -> Option<usize> {
612 memchr(byte, self.rest.as_bytes())
613 }
614}
615
616#[derive(Clone, Debug)]
617struct PositionMap<'a> {
618 source: &'a str,
619 line_starts: Vec<usize>,
620 cached: Position,
621}
622
623#[cfg(feature = "benchmarking")]
624#[derive(Clone, Copy, Debug, Default)]
625pub(crate) struct LexerBenchmarkCounters {
626 pub(crate) current_position_calls: u64,
627}
628
629impl<'a> PositionMap<'a> {
630 fn new(source: &'a str) -> Self {
631 let mut line_starts =
632 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
633 line_starts.push(0);
634 line_starts.extend(
635 source
636 .bytes()
637 .enumerate()
638 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
639 );
640
641 Self {
642 source,
643 line_starts,
644 cached: Position::new(),
645 }
646 }
647
648 fn position(&mut self, offset: usize) -> Position {
649 if offset == self.cached.offset {
650 return self.cached;
651 }
652
653 let position = if offset > self.cached.offset && offset <= self.source.len() {
654 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
655 } else {
656 self.position_uncached(offset)
657 };
658 self.cached = position;
659 position
660 }
661
662 fn position_uncached(&self, offset: usize) -> Position {
663 let offset = offset.min(self.source.len());
664 let line_index = self
665 .line_starts
666 .partition_point(|start| *start <= offset)
667 .saturating_sub(1);
668 let line_start = self.line_starts[line_index];
669 let line_text = &self.source[line_start..offset];
670 let column = if line_text.is_ascii() {
671 line_text.len() + 1
672 } else {
673 line_text.chars().count() + 1
674 };
675
676 Position {
677 line: line_index + 1,
678 column,
679 offset,
680 }
681 }
682
683 fn advance_from(mut position: Position, text: &str) -> Position {
684 position.offset += text.len();
685 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
686 if newline_count == 0 {
687 position.column += if text.is_ascii() {
688 text.len()
689 } else {
690 text.chars().count()
691 };
692 return position;
693 }
694
695 position.line += newline_count;
696 let tail_start = memrchr(b'\n', text.as_bytes())
697 .map(|index| index + 1)
698 .unwrap_or_default();
699 let tail = &text[tail_start..];
700 position.column = if tail.is_ascii() {
701 tail.len() + 1
702 } else {
703 tail.chars().count() + 1
704 };
705 position
706 }
707}
708
709#[derive(Clone)]
711pub struct Lexer<'a> {
712 #[allow(dead_code)] input: &'a str,
714 offset: usize,
716 cursor: Cursor<'a>,
717 position_map: PositionMap<'a>,
718 reinject_buf: VecDeque<char>,
721 reinject_resume_offset: Option<usize>,
723 max_subst_depth: usize,
725 initial_zsh_options: Option<ZshOptionState>,
726 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
727 zsh_timeline_index: usize,
728 #[cfg(feature = "benchmarking")]
729 benchmark_counters: Option<LexerBenchmarkCounters>,
730}
731
732impl<'a> Lexer<'a> {
733 pub fn new(input: &'a str) -> Self {
735 Self::with_max_subst_depth_and_profile(
736 input,
737 DEFAULT_MAX_SUBST_DEPTH,
738 &ShellProfile::native(super::ShellDialect::Bash),
739 None,
740 )
741 }
742
743 pub fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
746 Self::with_max_subst_depth_and_profile(
747 input,
748 max_depth,
749 &ShellProfile::native(super::ShellDialect::Bash),
750 None,
751 )
752 }
753
754 pub fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
756 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
757 .then(|| ZshOptionTimeline::build(input, shell_profile))
758 .flatten()
759 .map(Arc::new);
760 Self::with_max_subst_depth_and_profile(
761 input,
762 DEFAULT_MAX_SUBST_DEPTH,
763 shell_profile,
764 zsh_timeline,
765 )
766 }
767
768 pub(crate) fn with_max_subst_depth_and_profile(
769 input: &'a str,
770 max_depth: usize,
771 shell_profile: &ShellProfile,
772 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
773 ) -> Self {
774 Self {
775 input,
776 offset: 0,
777 cursor: Cursor::new(input),
778 position_map: PositionMap::new(input),
779 reinject_buf: VecDeque::new(),
780 reinject_resume_offset: None,
781 max_subst_depth: max_depth,
782 initial_zsh_options: shell_profile.zsh_options().cloned(),
783 zsh_timeline,
784 zsh_timeline_index: 0,
785 #[cfg(feature = "benchmarking")]
786 benchmark_counters: None,
787 }
788 }
789
790 pub fn position(&self) -> Position {
792 self.position_map.position_uncached(self.offset)
793 }
794
795 fn current_position(&mut self) -> Position {
796 #[cfg(feature = "benchmarking")]
797 self.maybe_record_current_position_call();
798 self.position_map.position(self.offset)
799 }
800
801 #[cfg(feature = "benchmarking")]
802 pub(crate) fn enable_benchmark_counters(&mut self) {
803 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
804 }
805
806 #[cfg(feature = "benchmarking")]
807 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
808 self.benchmark_counters.unwrap_or_default()
809 }
810
811 #[cfg(feature = "benchmarking")]
812 fn maybe_record_current_position_call(&mut self) {
813 if let Some(counters) = &mut self.benchmark_counters {
814 counters.current_position_calls += 1;
815 }
816 }
817
818 fn sync_offset_to_cursor(&mut self) {
819 if self.reinject_buf.is_empty()
820 && let Some(offset) = self.reinject_resume_offset.take()
821 {
822 self.offset = offset;
823 }
824 }
825
826 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
829 self.next_lexed_token().map(|token| token.kind)
830 }
831
832 fn peek_char(&mut self) -> Option<char> {
833 self.sync_offset_to_cursor();
834 if let Some(&ch) = self.reinject_buf.front() {
835 Some(ch)
836 } else {
837 self.cursor.first()
838 }
839 }
840
841 fn advance(&mut self) -> Option<char> {
842 self.sync_offset_to_cursor();
843 let ch = if !self.reinject_buf.is_empty() {
844 self.reinject_buf.pop_front()
845 } else {
846 self.cursor.bump()
847 };
848 if let Some(c) = ch {
849 self.offset += c.len_utf8();
850 }
851 ch
852 }
853
854 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
855 self.reinject_buf
856 .iter()
857 .copied()
858 .chain(self.cursor.rest().chars())
859 }
860
861 fn second_char(&self) -> Option<char> {
862 match self.reinject_buf.len() {
863 0 => self.cursor.second(),
864 1 => self.cursor.first(),
865 _ => self.reinject_buf.get(1).copied(),
866 }
867 }
868
869 fn third_char(&self) -> Option<char> {
870 match self.reinject_buf.len() {
871 0 => self.cursor.third(),
872 1 => self.cursor.second(),
873 2 => self.cursor.first(),
874 _ => self.reinject_buf.get(2).copied(),
875 }
876 }
877
878 fn fourth_char(&self) -> Option<char> {
879 match self.reinject_buf.len() {
880 0 => self.cursor.rest().chars().nth(3),
881 1 => self.cursor.third(),
882 2 => self.cursor.second(),
883 3 => self.cursor.first(),
884 _ => self.reinject_buf.get(3).copied(),
885 }
886 }
887
888 fn consume_source_bytes(&mut self, byte_len: usize) {
889 debug_assert!(self.reinject_buf.is_empty());
890 self.sync_offset_to_cursor();
891 self.offset += byte_len;
892 self.cursor.skip_bytes(byte_len);
893 }
894
895 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
896 debug_assert!(self.reinject_buf.is_empty());
897 self.offset += byte_len;
898 }
899
900 fn consume_ascii_chars(&mut self, count: usize) {
901 if self.reinject_buf.is_empty() {
902 self.consume_source_bytes(count);
903 return;
904 }
905
906 for _ in 0..count {
907 self.advance();
908 }
909 }
910
911 fn source_horizontal_whitespace_len(&self) -> usize {
912 self.cursor
913 .rest()
914 .as_bytes()
915 .iter()
916 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
917 .count()
918 }
919
920 fn source_ascii_plain_word_len(&self) -> usize {
921 self.cursor
922 .rest()
923 .as_bytes()
924 .iter()
925 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
926 .count()
927 }
928
929 fn find_double_quote_special(source: &str) -> Option<usize> {
930 source
931 .as_bytes()
932 .iter()
933 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
934 }
935
936 fn ensure_capture_from_source(
937 &self,
938 capture: &mut Option<String>,
939 start: Position,
940 end: Position,
941 ) {
942 if capture.is_none() {
943 *capture = Some(self.input[start.offset..end.offset].to_string());
944 }
945 }
946
947 fn push_capture_char(capture: &mut Option<String>, ch: char) {
948 if let Some(text) = capture.as_mut() {
949 text.push(ch);
950 }
951 }
952
953 fn push_capture_str(capture: &mut Option<String>, text: &str) {
954 if let Some(current) = capture.as_mut() {
955 current.push_str(text);
956 }
957 }
958
959 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
960 if let Some(timeline) = self.zsh_timeline.as_ref() {
961 while self.zsh_timeline_index < timeline.entries.len()
962 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
963 {
964 self.zsh_timeline_index += 1;
965 }
966 return if self.zsh_timeline_index == 0 {
967 self.initial_zsh_options.as_ref()
968 } else {
969 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
970 };
971 }
972
973 self.initial_zsh_options.as_ref()
974 }
975
976 fn comments_enabled(&mut self) -> bool {
977 !self
978 .current_zsh_options()
979 .is_some_and(|options| options.interactive_comments.is_definitely_off())
980 }
981
982 fn rc_quotes_enabled(&mut self) -> bool {
983 self.current_zsh_options()
984 .is_some_and(|options| options.rc_quotes.is_definitely_on())
985 }
986
987 fn ignore_braces_enabled(&mut self) -> bool {
988 self.current_zsh_options()
989 .is_some_and(|options| options.ignore_braces.is_definitely_on())
990 }
991
992 fn ignore_close_braces_enabled(&mut self) -> bool {
993 self.current_zsh_options().is_some_and(|options| {
994 options.ignore_braces.is_definitely_on()
995 || options.ignore_close_braces.is_definitely_on()
996 })
997 }
998
999 fn should_treat_hash_as_word_char(&mut self) -> bool {
1000 if !self.comments_enabled() {
1001 return true;
1002 }
1003 self.reinject_buf.is_empty()
1004 && (self
1005 .input
1006 .get(..self.offset)
1007 .and_then(|prefix| prefix.chars().next_back())
1008 .is_some_and(|prev| {
1009 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1010 })
1011 || self.is_inside_unclosed_double_paren_on_line())
1012 }
1013
1014 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1015 capture
1016 .as_deref()
1017 .unwrap_or(&self.input[start.offset..self.offset])
1018 }
1019
1020 fn current_word_surface_is_single_char(
1021 &self,
1022 start: Position,
1023 capture: &Option<String>,
1024 target: char,
1025 ) -> bool {
1026 let text = self.current_word_text(start, capture);
1027 if !text.contains('\x00') {
1028 let mut encoded = [0; 4];
1029 return text == target.encode_utf8(&mut encoded);
1030 }
1031
1032 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1033 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1034 }
1035
1036 fn current_word_surface_last_char<'b>(
1037 &'b self,
1038 start: Position,
1039 capture: &'b Option<String>,
1040 ) -> Option<char> {
1041 self.current_word_text(start, capture)
1042 .chars()
1043 .rev()
1044 .find(|&ch| ch != '\x00')
1045 }
1046
1047 fn current_word_surface_ends_with_char(
1048 &self,
1049 start: Position,
1050 capture: &Option<String>,
1051 target: char,
1052 ) -> bool {
1053 self.current_word_surface_last_char(start, capture) == Some(target)
1054 }
1055
1056 fn current_word_surface_ends_with_extglob_prefix(
1057 &self,
1058 start: Position,
1059 capture: &Option<String>,
1060 ) -> bool {
1061 self.current_word_surface_last_char(start, capture)
1062 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1063 }
1064
1065 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1067 self.skip_whitespace();
1068 let start = self.current_position();
1069 let token = self.next_lexed_token_inner(false)?;
1070 let end = self.current_position();
1071 Some(token.with_span(Span::from_positions(start, end)))
1072 }
1073
1074 pub fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1076 self.skip_whitespace();
1077 let start = self.current_position();
1078 let token = self.next_lexed_token_inner(true)?;
1079 let end = self.current_position();
1080 Some(token.with_span(Span::from_positions(start, end)))
1081 }
1082
1083 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1085 let ch = self.peek_char()?;
1086
1087 match ch {
1088 '\n' => {
1089 self.consume_ascii_chars(1);
1090 Some(LexedToken::punctuation(TokenKind::Newline))
1091 }
1092 ';' => {
1093 if self.second_char() == Some(';') {
1094 if self.third_char() == Some('&') {
1095 self.consume_ascii_chars(3);
1096 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1098 self.consume_ascii_chars(2);
1099 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1101 } else if self.second_char() == Some('|') {
1102 self.consume_ascii_chars(2);
1103 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1105 self.consume_ascii_chars(2);
1106 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1108 self.consume_ascii_chars(1);
1109 Some(LexedToken::punctuation(TokenKind::Semicolon))
1110 }
1111 }
1112 '|' => {
1113 if self.second_char() == Some('|') {
1114 self.consume_ascii_chars(2);
1115 Some(LexedToken::punctuation(TokenKind::Or))
1116 } else if self.second_char() == Some('&') {
1117 self.consume_ascii_chars(2);
1118 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1119 } else {
1120 self.consume_ascii_chars(1);
1121 Some(LexedToken::punctuation(TokenKind::Pipe))
1122 }
1123 }
1124 '&' => {
1125 if self.second_char() == Some('&') {
1126 self.consume_ascii_chars(2);
1127 Some(LexedToken::punctuation(TokenKind::And))
1128 } else if self.second_char() == Some('>') {
1129 if self.third_char() == Some('>') {
1130 self.consume_ascii_chars(3);
1131 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1132 } else {
1133 self.consume_ascii_chars(2);
1134 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1135 }
1136 } else if self.second_char() == Some('|') {
1137 self.consume_ascii_chars(2);
1138 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1139 } else if self.second_char() == Some('!') {
1140 self.consume_ascii_chars(2);
1141 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1142 } else {
1143 self.consume_ascii_chars(1);
1144 Some(LexedToken::punctuation(TokenKind::Background))
1145 }
1146 }
1147 '>' => {
1148 if self.second_char() == Some('>') {
1149 if self.third_char() == Some('|') {
1150 self.consume_ascii_chars(3);
1151 } else {
1152 self.consume_ascii_chars(2);
1153 }
1154 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1155 } else if self.second_char() == Some('|') {
1156 self.consume_ascii_chars(2);
1157 Some(LexedToken::punctuation(TokenKind::Clobber))
1158 } else if self.second_char() == Some('(') {
1159 self.consume_ascii_chars(2);
1160 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1161 } else if self.second_char() == Some('&') {
1162 self.consume_ascii_chars(2);
1163 Some(LexedToken::punctuation(TokenKind::DupOutput))
1164 } else {
1165 self.consume_ascii_chars(1);
1166 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1167 }
1168 }
1169 '<' => {
1170 if self.second_char() == Some('<') {
1171 if self.third_char() == Some('<') {
1172 self.consume_ascii_chars(3);
1173 Some(LexedToken::punctuation(TokenKind::HereString))
1174 } else if self.third_char() == Some('-') {
1175 self.consume_ascii_chars(3);
1176 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1177 } else {
1178 self.consume_ascii_chars(2);
1179 Some(LexedToken::punctuation(TokenKind::HereDoc))
1180 }
1181 } else if self.second_char() == Some('>') {
1182 self.consume_ascii_chars(2);
1183 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1184 } else if self.second_char() == Some('(') {
1185 self.consume_ascii_chars(2);
1186 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1187 } else if self.second_char() == Some('&') {
1188 self.consume_ascii_chars(2);
1189 Some(LexedToken::punctuation(TokenKind::DupInput))
1190 } else {
1191 self.consume_ascii_chars(1);
1192 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1193 }
1194 }
1195 '(' => {
1196 if self.second_char() == Some('(') {
1197 self.consume_ascii_chars(2);
1198 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1199 } else {
1200 self.consume_ascii_chars(1);
1201 Some(LexedToken::punctuation(TokenKind::LeftParen))
1202 }
1203 }
1204 ')' => {
1205 if self.second_char() == Some(')') {
1206 self.consume_ascii_chars(2);
1207 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1208 } else {
1209 self.consume_ascii_chars(1);
1210 Some(LexedToken::punctuation(TokenKind::RightParen))
1211 }
1212 }
1213 '{' => {
1214 if self.ignore_braces_enabled() {
1215 let start = self.current_position();
1216 self.consume_ascii_chars(1);
1217 match self.peek_char() {
1218 Some(' ') | Some('\t') | Some('\n') | None => {
1219 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1220 }
1221 _ => self.read_word_starting_with("{", start),
1222 }
1223 } else if self.looks_like_brace_expansion() {
1224 self.read_brace_expansion_word()
1228 } else if self.is_brace_group_start() {
1229 self.advance();
1230 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1231 } else {
1232 self.read_brace_literal_word()
1234 }
1235 }
1236 '}' => {
1237 self.consume_ascii_chars(1);
1238 if self.ignore_close_braces_enabled() {
1239 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1240 } else {
1241 Some(LexedToken::punctuation(TokenKind::RightBrace))
1242 }
1243 }
1244 '[' => {
1245 let start = self.current_position();
1246 self.consume_ascii_chars(1);
1247 if self.peek_char() == Some('[')
1248 && matches!(
1249 self.second_char(),
1250 Some(' ') | Some('\t') | Some('\n') | None
1251 )
1252 {
1253 self.consume_ascii_chars(1);
1254 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1255 } else {
1256 match self.peek_char() {
1263 Some(' ') | Some('\t') | Some('\n') | None => {
1264 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1265 }
1266 _ => self.read_word_starting_with("[", start),
1267 }
1268 }
1269 }
1270 ']' => {
1271 if self.second_char() == Some(']') {
1272 self.consume_ascii_chars(2);
1273 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1274 } else {
1275 self.consume_ascii_chars(1);
1276 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1277 }
1278 }
1279 '\'' => self.read_single_quoted_string(),
1280 '"' => self.read_double_quoted_string(),
1281 '#' => {
1282 if self.should_treat_hash_as_word_char() {
1283 let start = self.current_position();
1284 return self.read_word_starting_with("#", start);
1285 }
1286 if preserve_comments {
1287 self.read_comment();
1288 Some(LexedToken::comment())
1289 } else {
1290 self.skip_comment();
1291 self.next_lexed_token_inner(false)
1292 }
1293 }
1294 '0'..='9' => self.read_word_or_fd_redirect(),
1296 _ => self.read_word(),
1297 }
1298 }
1299
1300 fn skip_whitespace(&mut self) {
1301 while let Some(ch) = self.peek_char() {
1302 if self.reinject_buf.is_empty() {
1303 let whitespace_len = self.source_horizontal_whitespace_len();
1304 if whitespace_len > 0 {
1305 self.consume_source_bytes(whitespace_len);
1306 continue;
1307 }
1308
1309 if self.cursor.rest().starts_with("\\\n") {
1310 self.consume_source_bytes(2);
1311 continue;
1312 }
1313 }
1314
1315 if ch == ' ' || ch == '\t' {
1316 self.consume_ascii_chars(1);
1317 } else if ch == '\\' {
1318 if self.second_char() == Some('\n') {
1320 self.consume_ascii_chars(2);
1321 } else {
1322 break;
1323 }
1324 } else {
1325 break;
1326 }
1327 }
1328 }
1329
1330 fn skip_comment(&mut self) {
1331 if self.reinject_buf.is_empty() {
1332 let end = self
1333 .cursor
1334 .find_byte(b'\n')
1335 .unwrap_or(self.cursor.rest().len());
1336 self.consume_source_bytes(end);
1337 return;
1338 }
1339
1340 while let Some(ch) = self.peek_char() {
1341 if ch == '\n' {
1342 break;
1343 }
1344 self.advance();
1345 }
1346 }
1347
1348 fn read_comment(&mut self) {
1349 debug_assert_eq!(self.peek_char(), Some('#'));
1350
1351 if self.reinject_buf.is_empty() {
1352 let rest = self.cursor.rest();
1353 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1354 self.consume_source_bytes(end);
1355 return;
1356 }
1357
1358 self.advance(); while let Some(ch) = self.peek_char() {
1361 if ch == '\n' {
1362 break;
1363 }
1364 self.advance();
1365 }
1366 }
1367
1368 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1369 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1370 return false;
1371 }
1372
1373 let line_start = self.input[..self.offset]
1374 .rfind('\n')
1375 .map_or(0, |index| index + 1);
1376 let prefix = &self.input[line_start..self.offset];
1377 line_has_unclosed_double_paren(prefix)
1378 }
1379
1380 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1383 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1384 let Some(fd) = first_digit.to_digit(10) else {
1385 unreachable!("peeked ASCII digit should convert to a base-10 digit");
1386 };
1387 let fd = fd as i32;
1388
1389 match (self.second_char(), self.third_char()) {
1390 (Some('>'), Some('>')) => {
1391 if self.fourth_char() == Some('|') {
1392 self.consume_ascii_chars(4);
1393 } else {
1394 self.consume_ascii_chars(3);
1395 }
1396 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1397 }
1398 (Some('>'), Some('|')) => {
1399 self.consume_ascii_chars(3);
1400 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1401 }
1402 (Some('>'), Some('&')) => {
1403 self.consume_ascii_chars(3);
1404
1405 let mut target_str = String::with_capacity(4);
1406 while let Some(c) = self.peek_char() {
1407 if c.is_ascii_digit() {
1408 target_str.push(c);
1409 self.advance();
1410 } else {
1411 break;
1412 }
1413 }
1414
1415 if target_str.is_empty() {
1416 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1417 }
1418
1419 let target_fd: i32 = target_str.parse().unwrap_or(1);
1420 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1421 }
1422 (Some('>'), _) => {
1423 self.consume_ascii_chars(2);
1424 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1425 }
1426 (Some('<'), Some('&')) => {
1427 self.consume_ascii_chars(3);
1428
1429 let mut target_str = String::with_capacity(4);
1430 while let Some(c) = self.peek_char() {
1431 if c.is_ascii_digit() || c == '-' {
1432 target_str.push(c);
1433 self.advance();
1434 if c == '-' {
1435 break;
1436 }
1437 } else {
1438 break;
1439 }
1440 }
1441
1442 if target_str == "-" {
1443 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1444 }
1445 let target_fd: i32 = target_str.parse().unwrap_or(0);
1446 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1447 }
1448 (Some('<'), Some('>')) => {
1449 self.consume_ascii_chars(3);
1450 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1451 }
1452 (Some('<'), Some('<')) => {}
1453 (Some('<'), _) => {
1454 self.consume_ascii_chars(2);
1455 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1456 }
1457 _ => {}
1458 }
1459 }
1460
1461 self.read_word()
1463 }
1464
1465 fn read_word_starting_with(
1466 &mut self,
1467 _prefix: &str,
1468 start: Position,
1469 ) -> Option<LexedToken<'a>> {
1470 let segment = match self.read_unquoted_segment(start) {
1471 Ok(segment) => segment,
1472 Err(kind) => return Some(LexedToken::error(kind)),
1473 };
1474 if segment.as_str().is_empty() {
1475 return None;
1476 }
1477 let mut lexed_word = LexedWord::from_segment(segment);
1478 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1479 return Some(LexedToken::error(kind));
1480 }
1481 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1482 }
1483
1484 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1485 let start = self.current_position();
1486
1487 if self.reinject_buf.is_empty() {
1488 let ascii_len = self.source_ascii_plain_word_len();
1489 let chunk = if ascii_len > 0
1490 && self
1491 .cursor
1492 .rest()
1493 .as_bytes()
1494 .get(ascii_len)
1495 .is_none_or(|byte| byte.is_ascii())
1496 {
1497 self.consume_source_bytes(ascii_len);
1498 &self.input[start.offset..self.offset]
1499 } else {
1500 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1501 self.advance_scanned_source_bytes(chunk.len());
1502 chunk
1503 };
1504 if !chunk.is_empty() {
1505 let continues = matches!(
1506 self.peek_char(),
1507 Some(next)
1508 if Self::is_word_char(next)
1509 || next == '$'
1510 || matches!(next, '\'' | '"')
1511 || next == '{'
1512 || (next == '('
1513 && (chunk.ends_with('=')
1514 || Self::word_can_take_parenthesized_suffix(chunk)))
1515 );
1516
1517 if !continues {
1518 let end = self.current_position();
1519 return Some(LexedToken::borrowed_word(
1520 TokenKind::Word,
1521 &self.input[start.offset..self.offset],
1522 Some(Span::from_positions(start, end)),
1523 ));
1524 }
1525
1526 if self.peek_char() == Some('(')
1527 && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1528 {
1529 return self.read_complex_word(start);
1530 }
1531
1532 let end = self.current_position();
1533 return self.finish_segmented_word(LexedWord::borrowed(
1534 LexedWordSegmentKind::Plain,
1535 &self.input[start.offset..self.offset],
1536 Some(Span::from_positions(start, end)),
1537 ));
1538 }
1539 }
1540
1541 self.read_complex_word(start)
1542 }
1543
1544 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1545 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1546 return Some(LexedToken::error(kind));
1547 }
1548
1549 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1550 }
1551
1552 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1553 if self.peek_char() == Some('$') {
1554 match self.second_char() {
1555 Some('\'') => return self.read_dollar_single_quoted_string(),
1556 Some('"') => return self.read_dollar_double_quoted_string(),
1557 _ => {}
1558 }
1559 }
1560
1561 let segment = match self.read_unquoted_segment(start) {
1562 Ok(segment) => segment,
1563 Err(kind) => return Some(LexedToken::error(kind)),
1564 };
1565
1566 if segment.as_str().is_empty() {
1567 return None;
1568 }
1569
1570 self.finish_segmented_word(LexedWord::from_segment(segment))
1571 }
1572
1573 fn read_unquoted_segment(
1574 &mut self,
1575 start: Position,
1576 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1577 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1578 while let Some(ch) = self.peek_char() {
1579 if ch == '"' || ch == '\'' {
1580 break;
1581 } else if ch == '$' {
1582 if matches!(self.second_char(), Some('\'') | Some('"'))
1583 && (self.current_position().offset > start.offset
1584 || word.as_ref().is_some_and(|word| !word.is_empty()))
1585 {
1586 break;
1587 }
1588
1589 self.advance();
1591
1592 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1596 Self::push_capture_char(&mut word, '[');
1597 self.advance();
1598 if !self.read_legacy_arithmetic_into(&mut word, start) {
1599 return Err(LexerErrorKind::CommandSubstitution);
1600 }
1601 } else if self.peek_char() == Some('(') {
1602 if self.second_char() == Some('(') {
1603 if !self.read_arithmetic_expansion_into(&mut word) {
1604 return Err(LexerErrorKind::CommandSubstitution);
1605 }
1606 } else {
1607 Self::push_capture_char(&mut word, '(');
1608 self.advance();
1609 if !self.read_command_subst_into(&mut word) {
1610 return Err(LexerErrorKind::CommandSubstitution);
1611 }
1612 }
1613 } else if self.peek_char() == Some('{') {
1614 Self::push_capture_char(&mut word, '{');
1617 self.advance();
1618 let _ = self.read_param_expansion_into(&mut word, start);
1619 } else {
1620 if let Some(c) = self.peek_char() {
1622 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1623 || c.is_ascii_digit()
1624 {
1625 Self::push_capture_char(&mut word, c);
1626 self.advance();
1627 } else {
1628 while let Some(c) = self.peek_char() {
1630 if c.is_ascii_alphanumeric() || c == '_' {
1631 Self::push_capture_char(&mut word, c);
1632 self.advance();
1633 } else {
1634 break;
1635 }
1636 }
1637 }
1638 }
1639 }
1640 } else if ch == '{' {
1641 if self.looks_like_mid_word_brace_segment() {
1642 Self::push_capture_char(&mut word, ch);
1645 self.advance();
1646 let mut depth = 1;
1647 while let Some(c) = self.peek_char() {
1648 Self::push_capture_char(&mut word, c);
1649 self.advance();
1650 if c == '{' {
1651 depth += 1;
1652 } else if c == '}' {
1653 depth -= 1;
1654 if depth == 0 {
1655 break;
1656 }
1657 }
1658 }
1659 } else {
1660 Self::push_capture_char(&mut word, ch);
1663 self.advance();
1664 }
1665 } else if ch == '`' {
1666 let capture_end = self.current_position();
1669 self.ensure_capture_from_source(&mut word, start, capture_end);
1670 Self::push_capture_char(&mut word, ch);
1671 self.advance(); let mut closed = false;
1673 while let Some(c) = self.peek_char() {
1674 Self::push_capture_char(&mut word, c);
1675 self.advance();
1676 if c == '`' {
1677 closed = true;
1678 break;
1679 }
1680 if c == '\\'
1681 && let Some(next) = self.peek_char()
1682 {
1683 Self::push_capture_char(&mut word, next);
1684 self.advance();
1685 }
1686 }
1687 if !closed {
1688 return Err(LexerErrorKind::BacktickSubstitution);
1689 }
1690 } else if ch == '\\' {
1691 let capture_end = self.current_position();
1692 self.ensure_capture_from_source(&mut word, start, capture_end);
1693 self.advance();
1694 if let Some(next) = self.peek_char() {
1695 if next == '\n' {
1696 self.advance();
1698 } else {
1699 Self::push_capture_char(&mut word, '\x00');
1704 Self::push_capture_char(&mut word, next);
1705 self.advance();
1706 if next == '{'
1707 && self.current_word_surface_is_single_char(start, &word, '{')
1708 && self.escaped_brace_sequence_looks_like_brace_expansion()
1709 {
1710 let mut depth = 1;
1711 while let Some(c) = self.peek_char() {
1712 Self::push_capture_char(&mut word, c);
1713 self.advance();
1714 match c {
1715 '{' => depth += 1,
1716 '}' => {
1717 depth -= 1;
1718 if depth == 0 {
1719 break;
1720 }
1721 }
1722 _ => {}
1723 }
1724 }
1725 }
1726 }
1727 } else {
1728 Self::push_capture_char(&mut word, '\\');
1729 }
1730 } else if ch == '('
1731 && self.current_word_surface_ends_with_char(start, &word, '=')
1732 && self.looks_like_assoc_assign()
1733 {
1734 Self::push_capture_char(&mut word, ch);
1737 self.advance();
1738 let mut depth = 1;
1739 while let Some(c) = self.peek_char() {
1740 Self::push_capture_char(&mut word, c);
1741 self.advance();
1742 match c {
1743 '(' => depth += 1,
1744 ')' => {
1745 depth -= 1;
1746 if depth == 0 {
1747 break;
1748 }
1749 }
1750 '"' => {
1751 while let Some(qc) = self.peek_char() {
1752 Self::push_capture_char(&mut word, qc);
1753 self.advance();
1754 if qc == '"' {
1755 break;
1756 }
1757 if qc == '\\'
1758 && let Some(esc) = self.peek_char()
1759 {
1760 Self::push_capture_char(&mut word, esc);
1761 self.advance();
1762 }
1763 }
1764 }
1765 '\'' => {
1766 while let Some(qc) = self.peek_char() {
1767 Self::push_capture_char(&mut word, qc);
1768 self.advance();
1769 if qc == '\'' {
1770 break;
1771 }
1772 }
1773 }
1774 '\\' => {
1775 if let Some(esc) = self.peek_char() {
1776 Self::push_capture_char(&mut word, esc);
1777 self.advance();
1778 }
1779 }
1780 _ => {}
1781 }
1782 }
1783 } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1784 {
1785 Self::push_capture_char(&mut word, ch);
1788 self.advance();
1789 let mut depth = 1;
1790 while let Some(c) = self.peek_char() {
1791 Self::push_capture_char(&mut word, c);
1792 self.advance();
1793 match c {
1794 '(' => depth += 1,
1795 ')' => {
1796 depth -= 1;
1797 if depth == 0 {
1798 break;
1799 }
1800 }
1801 '\\' => {
1802 if let Some(esc) = self.peek_char() {
1803 Self::push_capture_char(&mut word, esc);
1804 self.advance();
1805 }
1806 }
1807 _ => {}
1808 }
1809 }
1810 } else if Self::is_plain_word_char(ch) {
1811 if self.reinject_buf.is_empty() {
1812 let ascii_len = self.source_ascii_plain_word_len();
1813 let chunk = if ascii_len > 0
1814 && self
1815 .cursor
1816 .rest()
1817 .as_bytes()
1818 .get(ascii_len)
1819 .is_none_or(|byte| byte.is_ascii())
1820 {
1821 self.consume_source_bytes(ascii_len);
1822 &self.input[self.offset - ascii_len..self.offset]
1823 } else {
1824 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1825 self.advance_scanned_source_bytes(chunk.len());
1826 chunk
1827 };
1828 Self::push_capture_str(&mut word, chunk);
1829 } else {
1830 Self::push_capture_char(&mut word, ch);
1831 self.advance();
1832 }
1833 } else {
1834 break;
1835 }
1836 }
1837
1838 if let Some(word) = word {
1839 let span = Some(Span::from_positions(start, self.current_position()));
1840 Ok(LexedWordSegment::owned_with_spans(
1841 LexedWordSegmentKind::Plain,
1842 word,
1843 span,
1844 span,
1845 ))
1846 } else {
1847 let end = self.current_position();
1848 Ok(LexedWordSegment::borrowed(
1849 LexedWordSegmentKind::Plain,
1850 &self.input[start.offset..self.offset],
1851 Some(Span::from_positions(start, end)),
1852 ))
1853 }
1854 }
1855
1856 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1857 let segment = match self.read_single_quoted_segment() {
1858 Ok(segment) => segment,
1859 Err(kind) => return Some(LexedToken::error(kind)),
1860 };
1861 let mut word = LexedWord::from_segment(segment);
1862 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1863 return Some(LexedToken::error(kind));
1864 }
1865
1866 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1867 }
1868
1869 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1870 debug_assert_eq!(self.peek_char(), Some('\''));
1871
1872 let wrapper_start = self.current_position();
1873 self.consume_ascii_chars(1); let content_start = self.current_position();
1875 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1876 let mut content_end = content_start;
1877 let mut content = String::with_capacity(16);
1878 let mut closed = false;
1879
1880 if can_borrow {
1881 let rest = self.cursor.rest();
1882 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1883 self.consume_source_bytes(quote_index);
1884 content_end = self.current_position();
1885 self.consume_ascii_chars(1); closed = true;
1887 } else {
1888 self.consume_source_bytes(rest.len());
1889 }
1890 }
1891
1892 while let Some(ch) = self.peek_char() {
1893 if closed {
1894 break;
1895 }
1896 if ch == '\'' {
1897 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1898 if !can_borrow {
1899 content.push('\'');
1900 }
1901 self.advance();
1902 self.advance();
1903 continue;
1904 }
1905 content_end = self.current_position();
1906 self.consume_ascii_chars(1); closed = true;
1908 break;
1909 }
1910 if !can_borrow {
1911 content.push(ch);
1912 }
1913 self.advance();
1914 }
1915
1916 if !closed {
1917 return Err(LexerErrorKind::SingleQuote);
1918 }
1919
1920 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1921 let content_span = Some(Span::from_positions(content_start, content_end));
1922
1923 if can_borrow {
1924 Ok(LexedWordSegment::borrowed_with_spans(
1925 LexedWordSegmentKind::SingleQuoted,
1926 &self.input[content_start.offset..content_end.offset],
1927 content_span,
1928 wrapper_span,
1929 ))
1930 } else {
1931 Ok(LexedWordSegment::owned_with_spans(
1932 LexedWordSegmentKind::SingleQuoted,
1933 content,
1934 content_span,
1935 wrapper_span,
1936 ))
1937 }
1938 }
1939
1940 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1941 let segment = match self.read_dollar_single_quoted_segment() {
1942 Ok(segment) => segment,
1943 Err(kind) => return Some(LexedToken::error(kind)),
1944 };
1945 let mut word = LexedWord::from_segment(segment);
1946 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1947 return Some(LexedToken::error(kind));
1948 }
1949
1950 let kind = if word.single_segment().is_some() {
1951 TokenKind::LiteralWord
1952 } else {
1953 TokenKind::Word
1954 };
1955
1956 Some(LexedToken::with_word_payload(kind, word))
1957 }
1958
1959 fn read_dollar_single_quoted_segment(
1960 &mut self,
1961 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1962 debug_assert_eq!(self.peek_char(), Some('$'));
1963 debug_assert_eq!(self.second_char(), Some('\''));
1964
1965 let wrapper_start = self.current_position();
1966 self.consume_ascii_chars(2); let content_start = self.current_position();
1968 let mut out = String::with_capacity(16);
1969
1970 while let Some(ch) = self.peek_char() {
1971 if ch == '\'' {
1972 let content_end = self.current_position();
1973 self.advance();
1974 let wrapper_span =
1975 Some(Span::from_positions(wrapper_start, self.current_position()));
1976 let content_span = Some(Span::from_positions(content_start, content_end));
1977 return Ok(LexedWordSegment::owned_with_spans(
1978 LexedWordSegmentKind::DollarSingleQuoted,
1979 out,
1980 content_span,
1981 wrapper_span,
1982 ));
1983 }
1984
1985 if ch == '\\' {
1986 self.advance();
1987 if let Some(esc) = self.peek_char() {
1988 self.advance();
1989 match esc {
1990 'n' => out.push('\n'),
1991 't' => out.push('\t'),
1992 'r' => out.push('\r'),
1993 'a' => out.push('\x07'),
1994 'b' => out.push('\x08'),
1995 'f' => out.push('\x0C'),
1996 'v' => out.push('\x0B'),
1997 'e' | 'E' => out.push('\x1B'),
1998 '\\' => out.push('\\'),
1999 '\'' => out.push('\''),
2000 '"' => out.push('"'),
2001 '?' => out.push('?'),
2002 'c' => {
2003 if let Some(control) = self.peek_char() {
2004 self.advance();
2005 out.push(((control as u32 & 0x1F) as u8) as char);
2006 } else {
2007 out.push('\\');
2008 out.push('c');
2009 }
2010 }
2011 'x' => {
2012 let mut hex = String::new();
2013 for _ in 0..2 {
2014 if let Some(h) = self.peek_char() {
2015 if h.is_ascii_hexdigit() {
2016 hex.push(h);
2017 self.advance();
2018 } else {
2019 break;
2020 }
2021 }
2022 }
2023 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2024 out.push(val as char);
2025 }
2026 }
2027 'u' => {
2028 let mut hex = String::new();
2029 for _ in 0..4 {
2030 if let Some(h) = self.peek_char() {
2031 if h.is_ascii_hexdigit() {
2032 hex.push(h);
2033 self.advance();
2034 } else {
2035 break;
2036 }
2037 }
2038 }
2039 if let Ok(val) = u32::from_str_radix(&hex, 16)
2040 && let Some(c) = char::from_u32(val)
2041 {
2042 out.push(c);
2043 }
2044 }
2045 'U' => {
2046 let mut hex = String::new();
2047 for _ in 0..8 {
2048 if let Some(h) = self.peek_char() {
2049 if h.is_ascii_hexdigit() {
2050 hex.push(h);
2051 self.advance();
2052 } else {
2053 break;
2054 }
2055 }
2056 }
2057 if let Ok(val) = u32::from_str_radix(&hex, 16)
2058 && let Some(c) = char::from_u32(val)
2059 {
2060 out.push(c);
2061 }
2062 }
2063 '0'..='7' => {
2064 let mut oct = String::new();
2065 oct.push(esc);
2066 for _ in 0..2 {
2067 if let Some(o) = self.peek_char() {
2068 if o.is_ascii_digit() && o < '8' {
2069 oct.push(o);
2070 self.advance();
2071 } else {
2072 break;
2073 }
2074 }
2075 }
2076 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2077 out.push(val as char);
2078 }
2079 }
2080 _ => {
2081 out.push('\\');
2082 out.push(esc);
2083 }
2084 }
2085 } else {
2086 out.push('\\');
2087 }
2088 continue;
2089 }
2090
2091 out.push(ch);
2092 self.advance();
2093 }
2094
2095 Err(LexerErrorKind::SingleQuote)
2096 }
2097
2098 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2099 let start = self.current_position();
2100
2101 if self.reinject_buf.is_empty() {
2102 let ascii_len = self.source_ascii_plain_word_len();
2103 let chunk = if ascii_len > 0
2104 && self
2105 .cursor
2106 .rest()
2107 .as_bytes()
2108 .get(ascii_len)
2109 .is_none_or(|byte| byte.is_ascii())
2110 {
2111 self.consume_source_bytes(ascii_len);
2112 &self.input[start.offset..self.offset]
2113 } else {
2114 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2115 self.advance_scanned_source_bytes(chunk.len());
2116 chunk
2117 };
2118 if chunk.is_empty() {
2119 return None;
2120 }
2121
2122 let end = self.current_position();
2123 return Some(LexedWordSegment::borrowed(
2124 LexedWordSegmentKind::Plain,
2125 &self.input[start.offset..self.offset],
2126 Some(Span::from_positions(start, end)),
2127 ));
2128 }
2129
2130 let ch = self.peek_char()?;
2131 if !Self::is_plain_word_char(ch) {
2132 return None;
2133 }
2134
2135 let mut text = String::with_capacity(16);
2136 while let Some(ch) = self.peek_char() {
2137 if !Self::is_plain_word_char(ch) {
2138 break;
2139 }
2140 text.push(ch);
2141 self.advance();
2142 }
2143
2144 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2145 }
2146
2147 fn append_segmented_continuation(
2150 &mut self,
2151 word: &mut LexedWord<'a>,
2152 ) -> Result<(), LexerErrorKind> {
2153 loop {
2154 match self.peek_char() {
2155 Some('\'') => {
2156 word.push_segment(self.read_single_quoted_segment()?);
2157 }
2158 Some('"') => {
2159 word.push_segment(self.read_double_quoted_segment()?);
2160 }
2161 Some('$') if self.second_char() == Some('\'') => {
2162 word.push_segment(self.read_dollar_single_quoted_segment()?);
2163 }
2164 Some('$') if self.second_char() == Some('"') => {
2165 word.push_segment(self.read_dollar_double_quoted_segment()?);
2166 }
2167 Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2168 let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2169 unreachable!("peeked '(' should produce a suffix segment");
2170 };
2171 word.push_segment(segment);
2172 }
2173 _ => {
2174 if let Some(segment) = self.read_plain_continuation_segment() {
2175 word.push_segment(segment);
2176 continue;
2177 }
2178
2179 let start = self.current_position();
2180 let plain = self.read_unquoted_segment(start)?;
2181 if plain.as_str().is_empty() {
2182 break;
2183 }
2184 word.push_segment(plain);
2185 }
2186 }
2187 }
2188
2189 Ok(())
2190 }
2191
2192 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2193 debug_assert_eq!(self.peek_char(), Some('('));
2194
2195 let start = self.current_position();
2196 let mut depth = 0usize;
2197 let mut escaped = false;
2198 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2199
2200 while let Some(ch) = self.peek_char() {
2201 if let Some(text) = text.as_mut() {
2202 text.push(ch);
2203 }
2204 self.advance();
2205
2206 if escaped {
2207 escaped = false;
2208 continue;
2209 }
2210
2211 match ch {
2212 '\\' => escaped = true,
2213 '(' => depth += 1,
2214 ')' => {
2215 depth = depth.saturating_sub(1);
2216 if depth == 0 {
2217 break;
2218 }
2219 }
2220 _ => {}
2221 }
2222 }
2223
2224 let end = self.current_position();
2225 let span = Some(Span::from_positions(start, end));
2226 if let Some(text) = text {
2227 Some(LexedWordSegment::owned_with_spans(
2228 LexedWordSegmentKind::Plain,
2229 text,
2230 span,
2231 span,
2232 ))
2233 } else {
2234 Some(LexedWordSegment::borrowed_with_spans(
2235 LexedWordSegmentKind::Plain,
2236 &self.input[start.offset..end.offset],
2237 span,
2238 span,
2239 ))
2240 }
2241 }
2242
2243 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2244 self.read_double_quoted_word(false)
2245 }
2246
2247 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2248 self.read_double_quoted_word(true)
2249 }
2250
2251 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2252 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2253 Ok(segment) => segment,
2254 Err(kind) => return Some(LexedToken::error(kind)),
2255 };
2256 let mut word = LexedWord::from_segment(segment);
2257 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2258 return Some(LexedToken::error(kind));
2259 }
2260
2261 let kind = if word.single_segment().is_some() {
2262 TokenKind::QuotedWord
2263 } else {
2264 TokenKind::Word
2265 };
2266
2267 Some(LexedToken::with_word_payload(kind, word))
2268 }
2269
2270 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2271 self.read_double_quoted_segment_with_dollar(false)
2272 }
2273
2274 fn read_dollar_double_quoted_segment(
2275 &mut self,
2276 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2277 self.read_double_quoted_segment_with_dollar(true)
2278 }
2279
2280 fn read_double_quoted_segment_with_dollar(
2281 &mut self,
2282 dollar: bool,
2283 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2284 if dollar {
2285 debug_assert_eq!(self.peek_char(), Some('$'));
2286 debug_assert_eq!(self.second_char(), Some('"'));
2287 } else {
2288 debug_assert_eq!(self.peek_char(), Some('"'));
2289 }
2290
2291 let wrapper_start = self.current_position();
2292 if dollar {
2293 self.consume_ascii_chars(2); } else {
2295 self.consume_ascii_chars(1); }
2297 let content_start = self.current_position();
2298 let mut content_end = content_start;
2299 let mut simple = self.reinject_buf.is_empty();
2300 let mut borrowable = self.reinject_buf.is_empty();
2301 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2302 let mut closed = false;
2303
2304 while let Some(ch) = self.peek_char() {
2305 if simple {
2306 if self.reinject_buf.is_empty() {
2307 let rest = self.cursor.rest();
2308 match Self::find_double_quote_special(rest) {
2309 Some(index) if index > 0 => {
2310 self.consume_source_bytes(index);
2311 continue;
2312 }
2313 None => {
2314 self.consume_source_bytes(rest.len());
2315 return Err(LexerErrorKind::DoubleQuote);
2316 }
2317 _ => {}
2318 }
2319 }
2320
2321 match ch {
2322 '"' => {
2323 content_end = self.current_position();
2324 self.consume_ascii_chars(1); closed = true;
2326 break;
2327 }
2328 '\\' | '$' | '`' => {
2329 simple = false;
2330 if ch == '`' {
2331 borrowable = false;
2332 let capture_end = self.current_position();
2333 self.ensure_capture_from_source(
2334 &mut content,
2335 content_start,
2336 capture_end,
2337 );
2338 }
2339 }
2340 _ => {
2341 self.advance();
2342 }
2343 }
2344 if simple {
2345 continue;
2346 }
2347 }
2348
2349 match ch {
2350 '"' => {
2351 if borrowable {
2352 content_end = self.current_position();
2353 }
2354 self.consume_ascii_chars(1); closed = true;
2356 break;
2357 }
2358 '\\' => {
2359 let escape_start = self.current_position();
2360 self.advance();
2361 if let Some(next) = self.peek_char() {
2362 match next {
2363 '\n' => {
2364 borrowable = false;
2365 self.ensure_capture_from_source(
2366 &mut content,
2367 content_start,
2368 escape_start,
2369 );
2370 self.advance();
2371 }
2372 '$' => {
2373 borrowable = false;
2374 self.ensure_capture_from_source(
2375 &mut content,
2376 content_start,
2377 escape_start,
2378 );
2379 Self::push_capture_char(&mut content, '\x00');
2380 Self::push_capture_char(&mut content, '$');
2381 self.advance();
2382 }
2383 '"' | '\\' | '`' => {
2384 borrowable = false;
2385 self.ensure_capture_from_source(
2386 &mut content,
2387 content_start,
2388 escape_start,
2389 );
2390 if next == '\\' {
2391 Self::push_capture_char(&mut content, '\x00');
2392 }
2393 if next == '`' {
2394 Self::push_capture_char(&mut content, '\x00');
2395 }
2396 Self::push_capture_char(&mut content, next);
2397 self.advance();
2398 content_end = self.current_position();
2399 }
2400 _ => {
2401 Self::push_capture_char(&mut content, '\\');
2402 Self::push_capture_char(&mut content, next);
2403 self.advance();
2404 content_end = self.current_position();
2405 }
2406 }
2407 }
2408 }
2409 '$' => {
2410 Self::push_capture_char(&mut content, '$');
2411 self.advance();
2412 if self.peek_char() == Some('(') {
2413 if self.second_char() == Some('(') {
2414 self.read_arithmetic_expansion_into(&mut content);
2415 } else {
2416 Self::push_capture_char(&mut content, '(');
2417 self.advance();
2418 self.read_command_subst_into(&mut content);
2419 }
2420 } else if self.peek_char() == Some('{') {
2421 Self::push_capture_char(&mut content, '{');
2422 self.advance();
2423 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2424 }
2425 content_end = self.current_position();
2426 }
2427 '`' => {
2428 borrowable = false;
2429 let capture_end = self.current_position();
2430 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2431 Self::push_capture_char(&mut content, '`');
2432 self.advance(); while let Some(c) = self.peek_char() {
2434 Self::push_capture_char(&mut content, c);
2435 self.advance();
2436 if c == '`' {
2437 break;
2438 }
2439 if c == '\\'
2440 && let Some(next) = self.peek_char()
2441 {
2442 Self::push_capture_char(&mut content, next);
2443 self.advance();
2444 }
2445 }
2446 content_end = self.current_position();
2447 }
2448 _ => {
2449 Self::push_capture_char(&mut content, ch);
2450 self.advance();
2451 content_end = self.current_position();
2452 }
2453 }
2454 }
2455
2456 if !closed {
2457 return Err(LexerErrorKind::DoubleQuote);
2458 }
2459
2460 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2461 let content_span = Some(Span::from_positions(content_start, content_end));
2462
2463 if borrowable {
2464 Ok(LexedWordSegment::borrowed_with_spans(
2465 if dollar {
2466 LexedWordSegmentKind::DollarDoubleQuoted
2467 } else {
2468 LexedWordSegmentKind::DoubleQuoted
2469 },
2470 &self.input[content_start.offset..content_end.offset],
2471 content_span,
2472 wrapper_span,
2473 ))
2474 } else {
2475 Ok(LexedWordSegment::owned_with_spans(
2476 if dollar {
2477 LexedWordSegmentKind::DollarDoubleQuoted
2478 } else {
2479 LexedWordSegmentKind::DoubleQuoted
2480 },
2481 content.unwrap_or_default(),
2482 content_span,
2483 wrapper_span,
2484 ))
2485 }
2486 }
2487
2488 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2489 debug_assert_eq!(self.peek_char(), Some('('));
2490 debug_assert_eq!(self.second_char(), Some('('));
2491
2492 Self::push_capture_char(content, '(');
2493 self.advance();
2494 Self::push_capture_char(content, '(');
2495 self.advance();
2496
2497 let mut depth = 2;
2498 while let Some(c) = self.peek_char() {
2499 match c {
2500 '\\' => {
2501 Self::push_capture_char(content, c);
2502 self.advance();
2503 if let Some(next) = self.peek_char() {
2504 Self::push_capture_char(content, next);
2505 self.advance();
2506 }
2507 }
2508 '\'' => {
2509 Self::push_capture_char(content, c);
2510 self.advance();
2511 while let Some(quoted) = self.peek_char() {
2512 Self::push_capture_char(content, quoted);
2513 self.advance();
2514 if quoted == '\'' {
2515 break;
2516 }
2517 }
2518 }
2519 '"' => {
2520 let mut escaped = false;
2521 Self::push_capture_char(content, c);
2522 self.advance();
2523 while let Some(quoted) = self.peek_char() {
2524 Self::push_capture_char(content, quoted);
2525 self.advance();
2526 if escaped {
2527 escaped = false;
2528 continue;
2529 }
2530 match quoted {
2531 '\\' => escaped = true,
2532 '"' => break,
2533 _ => {}
2534 }
2535 }
2536 }
2537 '`' => {
2538 let mut escaped = false;
2539 Self::push_capture_char(content, c);
2540 self.advance();
2541 while let Some(quoted) = self.peek_char() {
2542 Self::push_capture_char(content, quoted);
2543 self.advance();
2544 if escaped {
2545 escaped = false;
2546 continue;
2547 }
2548 match quoted {
2549 '\\' => escaped = true,
2550 '`' => break,
2551 _ => {}
2552 }
2553 }
2554 }
2555 '(' => {
2556 Self::push_capture_char(content, c);
2557 self.advance();
2558 depth += 1;
2559 }
2560 ')' => {
2561 Self::push_capture_char(content, c);
2562 self.advance();
2563 depth -= 1;
2564 if depth == 0 {
2565 return true;
2566 }
2567 }
2568 _ => {
2569 Self::push_capture_char(content, c);
2570 self.advance();
2571 }
2572 }
2573 }
2574
2575 false
2576 }
2577
2578 fn read_legacy_arithmetic_into(
2579 &mut self,
2580 content: &mut Option<String>,
2581 segment_start: Position,
2582 ) -> bool {
2583 let mut bracket_depth = 1;
2584
2585 while let Some(c) = self.peek_char() {
2586 match c {
2587 '\\' => {
2588 Self::push_capture_char(content, c);
2589 self.advance();
2590 if let Some(next) = self.peek_char() {
2591 Self::push_capture_char(content, next);
2592 self.advance();
2593 }
2594 }
2595 '\'' => {
2596 Self::push_capture_char(content, c);
2597 self.advance();
2598 while let Some(quoted) = self.peek_char() {
2599 Self::push_capture_char(content, quoted);
2600 self.advance();
2601 if quoted == '\'' {
2602 break;
2603 }
2604 }
2605 }
2606 '"' => {
2607 let mut escaped = false;
2608 Self::push_capture_char(content, c);
2609 self.advance();
2610 while let Some(quoted) = self.peek_char() {
2611 Self::push_capture_char(content, quoted);
2612 self.advance();
2613 if escaped {
2614 escaped = false;
2615 continue;
2616 }
2617 match quoted {
2618 '\\' => escaped = true,
2619 '"' => break,
2620 _ => {}
2621 }
2622 }
2623 }
2624 '`' => {
2625 let mut escaped = false;
2626 Self::push_capture_char(content, c);
2627 self.advance();
2628 while let Some(quoted) = self.peek_char() {
2629 Self::push_capture_char(content, quoted);
2630 self.advance();
2631 if escaped {
2632 escaped = false;
2633 continue;
2634 }
2635 match quoted {
2636 '\\' => escaped = true,
2637 '`' => break,
2638 _ => {}
2639 }
2640 }
2641 }
2642 '[' => {
2643 Self::push_capture_char(content, c);
2644 self.advance();
2645 bracket_depth += 1;
2646 }
2647 ']' => {
2648 Self::push_capture_char(content, c);
2649 self.advance();
2650 bracket_depth -= 1;
2651 if bracket_depth == 0 {
2652 return true;
2653 }
2654 }
2655 '$' => {
2656 Self::push_capture_char(content, c);
2657 self.advance();
2658 if self.peek_char() == Some('(') {
2659 if self.second_char() == Some('(') {
2660 if !self.read_arithmetic_expansion_into(content) {
2661 return false;
2662 }
2663 } else {
2664 Self::push_capture_char(content, '(');
2665 self.advance();
2666 if !self.read_command_subst_into(content) {
2667 return false;
2668 }
2669 }
2670 } else if self.peek_char() == Some('{') {
2671 Self::push_capture_char(content, '{');
2672 self.advance();
2673 if !self.read_param_expansion_into(content, segment_start) {
2674 return false;
2675 }
2676 } else if self.peek_char() == Some('[') {
2677 Self::push_capture_char(content, '[');
2678 self.advance();
2679 if !self.read_legacy_arithmetic_into(content, segment_start) {
2680 return false;
2681 }
2682 }
2683 }
2684 _ => {
2685 Self::push_capture_char(content, c);
2686 self.advance();
2687 }
2688 }
2689 }
2690
2691 false
2692 }
2693
2694 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2698 self.read_command_subst_into_depth(content, 0)
2699 }
2700
2701 fn flush_command_subst_keyword(
2702 current_word: &mut String,
2703 pending_case_headers: &mut usize,
2704 case_clause_depths: &mut SmallVec<[usize; 4]>,
2705 depth: usize,
2706 word_started_at_command_start: &mut bool,
2707 ) {
2708 if current_word.is_empty() {
2709 *word_started_at_command_start = false;
2710 return;
2711 }
2712
2713 match current_word.as_str() {
2714 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2715 "in" if *pending_case_headers > 0 => {
2716 *pending_case_headers -= 1;
2717 case_clause_depths.push(depth);
2718 }
2719 "esac" if *word_started_at_command_start => {
2720 case_clause_depths.pop();
2721 }
2722 _ => {}
2723 }
2724
2725 current_word.clear();
2726 *word_started_at_command_start = false;
2727 }
2728
2729 fn read_command_subst_heredoc_delimiter_into(
2730 &mut self,
2731 content: &mut Option<String>,
2732 ) -> Option<String> {
2733 while let Some(ch) = self.peek_char() {
2734 if !matches!(ch, ' ' | '\t') {
2735 break;
2736 }
2737 Self::push_capture_char(content, ch);
2738 self.advance();
2739 }
2740
2741 let mut cooked = String::new();
2742 let mut in_single = false;
2743 let mut in_double = false;
2744 let mut escaped = false;
2745 let mut saw_any = false;
2746
2747 while let Some(ch) = self.peek_char() {
2748 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2749 break;
2750 }
2751
2752 saw_any = true;
2753 Self::push_capture_char(content, ch);
2754 self.advance();
2755
2756 if escaped {
2757 cooked.push(ch);
2758 escaped = false;
2759 continue;
2760 }
2761
2762 match ch {
2763 '\\' if !in_single => escaped = true,
2764 '\'' if !in_double => in_single = !in_single,
2765 '"' if !in_single => in_double = !in_double,
2766 _ => cooked.push(ch),
2767 }
2768 }
2769
2770 saw_any.then_some(cooked)
2771 }
2772
2773 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2774 Self::push_capture_char(content, '`');
2775 self.advance();
2776 while let Some(ch) = self.peek_char() {
2777 Self::push_capture_char(content, ch);
2778 self.advance();
2779 if ch == '\\' {
2780 if let Some(esc) = self.peek_char() {
2781 Self::push_capture_char(content, esc);
2782 self.advance();
2783 }
2784 continue;
2785 }
2786 if ch == '`' {
2787 break;
2788 }
2789 }
2790 }
2791
2792 fn read_command_subst_pending_heredoc_into(
2793 &mut self,
2794 content: &mut Option<String>,
2795 delimiter: &str,
2796 strip_tabs: bool,
2797 ) -> bool {
2798 loop {
2799 let mut line = String::new();
2800 let mut saw_newline = false;
2801
2802 while let Some(ch) = self.peek_char() {
2803 self.advance();
2804 if ch == '\n' {
2805 saw_newline = true;
2806 break;
2807 }
2808 line.push(ch);
2809 }
2810
2811 Self::push_capture_str(content, &line);
2812 if saw_newline {
2813 Self::push_capture_char(content, '\n');
2814 }
2815
2816 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2817 return true;
2818 }
2819 }
2820 }
2821
2822 fn read_command_subst_into_depth(
2823 &mut self,
2824 content: &mut Option<String>,
2825 subst_depth: usize,
2826 ) -> bool {
2827 if subst_depth >= self.max_subst_depth {
2828 let mut depth = 1;
2830 while let Some(c) = self.peek_char() {
2831 self.advance();
2832 match c {
2833 '(' => depth += 1,
2834 ')' => {
2835 depth -= 1;
2836 if depth == 0 {
2837 Self::push_capture_char(content, ')');
2838 return true;
2839 }
2840 }
2841 _ => {}
2842 }
2843 }
2844 return false;
2845 }
2846
2847 let mut depth = 1;
2848 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2849 let mut pending_case_headers = 0usize;
2850 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2851 let mut current_word = String::with_capacity(16);
2852 let mut at_command_start = true;
2853 let mut expecting_redirection_target = false;
2854 let mut current_word_started_at_command_start = false;
2855 while let Some(c) = self.peek_char() {
2856 match c {
2857 '#' if !self.should_treat_hash_as_word_char() => {
2858 let had_word = !current_word.is_empty();
2859 Self::flush_command_subst_keyword(
2860 &mut current_word,
2861 &mut pending_case_headers,
2862 &mut case_clause_depths,
2863 depth,
2864 &mut current_word_started_at_command_start,
2865 );
2866 if had_word && expecting_redirection_target {
2867 expecting_redirection_target = false;
2868 }
2869 Self::push_capture_char(content, '#');
2870 self.advance();
2871 while let Some(comment_ch) = self.peek_char() {
2872 Self::push_capture_char(content, comment_ch);
2873 self.advance();
2874 if comment_ch == '\n' {
2875 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2876 if !self.read_command_subst_pending_heredoc_into(
2877 content, &delimiter, strip_tabs,
2878 ) {
2879 return false;
2880 }
2881 }
2882 at_command_start = true;
2883 expecting_redirection_target = false;
2884 break;
2885 }
2886 }
2887 }
2888 '(' => {
2889 Self::flush_command_subst_keyword(
2890 &mut current_word,
2891 &mut pending_case_headers,
2892 &mut case_clause_depths,
2893 depth,
2894 &mut current_word_started_at_command_start,
2895 );
2896 depth += 1;
2897 Self::push_capture_char(content, c);
2898 self.advance();
2899 at_command_start = true;
2900 expecting_redirection_target = false;
2901 }
2902 ')' => {
2903 Self::flush_command_subst_keyword(
2904 &mut current_word,
2905 &mut pending_case_headers,
2906 &mut case_clause_depths,
2907 depth,
2908 &mut current_word_started_at_command_start,
2909 );
2910 if case_clause_depths
2911 .last()
2912 .is_some_and(|case_depth| *case_depth == depth)
2913 {
2914 Self::push_capture_char(content, ')');
2915 self.advance();
2916 at_command_start = true;
2917 expecting_redirection_target = false;
2918 continue;
2919 }
2920 depth -= 1;
2921 self.advance();
2922 if depth == 0 {
2923 Self::push_capture_char(content, ')');
2924 return true;
2925 }
2926 Self::push_capture_char(content, c);
2927 at_command_start = false;
2928 expecting_redirection_target = false;
2929 }
2930 '"' => {
2931 let had_word = !current_word.is_empty();
2932 Self::flush_command_subst_keyword(
2933 &mut current_word,
2934 &mut pending_case_headers,
2935 &mut case_clause_depths,
2936 depth,
2937 &mut current_word_started_at_command_start,
2938 );
2939 if had_word && expecting_redirection_target {
2940 expecting_redirection_target = false;
2941 }
2942 Self::push_capture_char(content, '"');
2944 self.advance();
2945 while let Some(qc) = self.peek_char() {
2946 match qc {
2947 '"' => {
2948 Self::push_capture_char(content, '"');
2949 self.advance();
2950 break;
2951 }
2952 '\\' => {
2953 Self::push_capture_char(content, '\\');
2954 self.advance();
2955 if let Some(esc) = self.peek_char() {
2956 Self::push_capture_char(content, esc);
2957 self.advance();
2958 }
2959 }
2960 '$' => {
2961 Self::push_capture_char(content, '$');
2962 self.advance();
2963 if self.peek_char() == Some('(') {
2964 if self.second_char() == Some('(') {
2965 if !self.read_arithmetic_expansion_into(content) {
2966 return false;
2967 }
2968 } else {
2969 Self::push_capture_char(content, '(');
2970 self.advance();
2971 if !self
2972 .read_command_subst_into_depth(content, subst_depth + 1)
2973 {
2974 return false;
2975 }
2976 }
2977 }
2978 }
2979 _ => {
2980 Self::push_capture_char(content, qc);
2981 self.advance();
2982 }
2983 }
2984 }
2985 if expecting_redirection_target {
2986 expecting_redirection_target = false;
2987 } else {
2988 at_command_start = false;
2989 }
2990 }
2991 '\'' => {
2992 let had_word = !current_word.is_empty();
2993 Self::flush_command_subst_keyword(
2994 &mut current_word,
2995 &mut pending_case_headers,
2996 &mut case_clause_depths,
2997 depth,
2998 &mut current_word_started_at_command_start,
2999 );
3000 if had_word && expecting_redirection_target {
3001 expecting_redirection_target = false;
3002 }
3003 Self::push_capture_char(content, '\'');
3005 self.advance();
3006 while let Some(qc) = self.peek_char() {
3007 Self::push_capture_char(content, qc);
3008 self.advance();
3009 if qc == '\'' {
3010 break;
3011 }
3012 }
3013 if expecting_redirection_target {
3014 expecting_redirection_target = false;
3015 } else {
3016 at_command_start = false;
3017 }
3018 }
3019 '`' => {
3020 let had_word = !current_word.is_empty();
3021 Self::flush_command_subst_keyword(
3022 &mut current_word,
3023 &mut pending_case_headers,
3024 &mut case_clause_depths,
3025 depth,
3026 &mut current_word_started_at_command_start,
3027 );
3028 if had_word && expecting_redirection_target {
3029 expecting_redirection_target = false;
3030 }
3031 self.read_command_subst_backtick_segment_into(content);
3032 if expecting_redirection_target {
3033 expecting_redirection_target = false;
3034 } else {
3035 at_command_start = false;
3036 }
3037 }
3038 '$' if self.second_char() == Some('\'') => {
3039 let had_word = !current_word.is_empty();
3040 Self::flush_command_subst_keyword(
3041 &mut current_word,
3042 &mut pending_case_headers,
3043 &mut case_clause_depths,
3044 depth,
3045 &mut current_word_started_at_command_start,
3046 );
3047 if had_word && expecting_redirection_target {
3048 expecting_redirection_target = false;
3049 }
3050 Self::push_capture_char(content, '$');
3051 self.advance();
3052 Self::push_capture_char(content, '\'');
3053 self.advance();
3054 while let Some(qc) = self.peek_char() {
3055 Self::push_capture_char(content, qc);
3056 self.advance();
3057 if qc == '\\' {
3058 if let Some(esc) = self.peek_char() {
3059 Self::push_capture_char(content, esc);
3060 self.advance();
3061 }
3062 continue;
3063 }
3064 if qc == '\'' {
3065 break;
3066 }
3067 }
3068 if expecting_redirection_target {
3069 expecting_redirection_target = false;
3070 } else {
3071 at_command_start = false;
3072 }
3073 }
3074 '\\' => {
3075 let had_word = !current_word.is_empty();
3076 Self::flush_command_subst_keyword(
3077 &mut current_word,
3078 &mut pending_case_headers,
3079 &mut case_clause_depths,
3080 depth,
3081 &mut current_word_started_at_command_start,
3082 );
3083 if had_word && expecting_redirection_target {
3084 expecting_redirection_target = false;
3085 }
3086 Self::push_capture_char(content, '\\');
3087 self.advance();
3088 if let Some(esc) = self.peek_char() {
3089 Self::push_capture_char(content, esc);
3090 self.advance();
3091 }
3092 if expecting_redirection_target {
3093 expecting_redirection_target = false;
3094 } else {
3095 at_command_start = false;
3096 }
3097 }
3098 '<' if self.second_char() == Some('<') => {
3099 let word_was_redirection_fd = current_word_started_at_command_start
3100 && !current_word.is_empty()
3101 && current_word.chars().all(|current| current.is_ascii_digit());
3102 Self::flush_command_subst_keyword(
3103 &mut current_word,
3104 &mut pending_case_headers,
3105 &mut case_clause_depths,
3106 depth,
3107 &mut current_word_started_at_command_start,
3108 );
3109 if word_was_redirection_fd {
3110 at_command_start = true;
3111 }
3112
3113 Self::push_capture_char(content, '<');
3114 self.advance();
3115 Self::push_capture_char(content, '<');
3116 self.advance();
3117
3118 if self.peek_char() == Some('<') {
3119 Self::push_capture_char(content, '<');
3120 self.advance();
3121 expecting_redirection_target = true;
3122 continue;
3123 }
3124
3125 let strip_tabs = if self.peek_char() == Some('-') {
3126 Self::push_capture_char(content, '-');
3127 self.advance();
3128 true
3129 } else {
3130 false
3131 };
3132
3133 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3134 {
3135 pending_heredocs.push((delimiter, strip_tabs));
3136 expecting_redirection_target = false;
3137 } else {
3138 expecting_redirection_target = true;
3139 }
3140 }
3141 '>' | '<' => {
3142 let word_was_redirection_fd = current_word_started_at_command_start
3143 && !current_word.is_empty()
3144 && current_word.chars().all(|current| current.is_ascii_digit());
3145 Self::flush_command_subst_keyword(
3146 &mut current_word,
3147 &mut pending_case_headers,
3148 &mut case_clause_depths,
3149 depth,
3150 &mut current_word_started_at_command_start,
3151 );
3152 if word_was_redirection_fd {
3153 at_command_start = true;
3154 }
3155 Self::push_capture_char(content, c);
3156 self.advance();
3157 expecting_redirection_target = true;
3158 }
3159 '\n' => {
3160 Self::flush_command_subst_keyword(
3161 &mut current_word,
3162 &mut pending_case_headers,
3163 &mut case_clause_depths,
3164 depth,
3165 &mut current_word_started_at_command_start,
3166 );
3167 Self::push_capture_char(content, '\n');
3168 self.advance();
3169 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3170 if !self.read_command_subst_pending_heredoc_into(
3171 content, &delimiter, strip_tabs,
3172 ) {
3173 return false;
3174 }
3175 }
3176 at_command_start = true;
3177 expecting_redirection_target = false;
3178 }
3179 _ => {
3180 if c.is_ascii_alphanumeric() || c == '_' {
3181 if current_word.is_empty()
3182 && !expecting_redirection_target
3183 && at_command_start
3184 {
3185 current_word_started_at_command_start = true;
3186 at_command_start = false;
3187 }
3188 current_word.push(c);
3189 } else {
3190 let had_word = !current_word.is_empty();
3191 Self::flush_command_subst_keyword(
3192 &mut current_word,
3193 &mut pending_case_headers,
3194 &mut case_clause_depths,
3195 depth,
3196 &mut current_word_started_at_command_start,
3197 );
3198 if had_word && expecting_redirection_target {
3199 expecting_redirection_target = false;
3200 }
3201 match c {
3202 ' ' | '\t' => {}
3203 ';' | '|' | '&' => {
3204 at_command_start = true;
3205 expecting_redirection_target = false;
3206 }
3207 _ => {
3208 if !expecting_redirection_target {
3209 at_command_start = false;
3210 }
3211 }
3212 }
3213 }
3214 Self::push_capture_char(content, c);
3215 self.advance();
3216 }
3217 }
3218 }
3219
3220 false
3221 }
3222
3223 fn read_param_expansion_into(
3227 &mut self,
3228 content: &mut Option<String>,
3229 segment_start: Position,
3230 ) -> bool {
3231 let mut borrowable = true;
3232 let mut depth = 1;
3233 let mut literal_brace_depth = 0usize;
3234 let mut in_single = false;
3235 let mut in_double = false;
3236 let mut double_quote_depth = 0usize;
3237 while let Some(c) = self.peek_char() {
3238 if in_single {
3239 match c {
3240 '\\' => {
3241 let escape_start = self.current_position();
3242 if self.second_char() == Some('"') {
3243 self.advance();
3244 borrowable = false;
3245 self.ensure_capture_from_source(content, segment_start, escape_start);
3246 Self::push_capture_char(content, '"');
3247 self.advance();
3248 } else {
3249 Self::push_capture_char(content, '\\');
3250 self.advance();
3251 }
3252 }
3253 '\'' => {
3254 Self::push_capture_char(content, c);
3255 self.advance();
3256 in_single = false;
3257 }
3258 _ => {
3259 Self::push_capture_char(content, c);
3260 self.advance();
3261 }
3262 }
3263 continue;
3264 }
3265
3266 match c {
3267 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3268 self.advance();
3269 Self::push_capture_char(content, '}');
3270 if depth == 1
3271 && literal_brace_depth > 0
3272 && self.has_later_top_level_param_expansion_closer(depth)
3273 {
3274 literal_brace_depth -= 1;
3275 continue;
3276 }
3277 depth -= 1;
3278 if depth == 0 {
3279 break;
3280 }
3281 }
3282 '{' if !in_single && !in_double => {
3283 literal_brace_depth += 1;
3284 Self::push_capture_char(content, '{');
3285 self.advance();
3286 }
3287 '"' => {
3288 Self::push_capture_char(content, '"');
3290 self.advance();
3291 in_double = !in_double;
3292 double_quote_depth = if in_double { depth } else { 0 };
3293 }
3294 '\'' => {
3295 Self::push_capture_char(content, '\'');
3296 self.advance();
3297 if !in_double {
3298 in_single = true;
3299 }
3300 }
3301 '\\' => {
3302 let escape_start = self.current_position();
3305 self.advance();
3306 if let Some(esc) = self.peek_char() {
3307 match esc {
3308 '$' => {
3309 borrowable = false;
3310 self.ensure_capture_from_source(
3311 content,
3312 segment_start,
3313 escape_start,
3314 );
3315 Self::push_capture_char(content, '\x00');
3316 Self::push_capture_char(content, '$');
3317 self.advance();
3318 }
3319 '"' | '\\' | '`' => {
3320 borrowable = false;
3321 self.ensure_capture_from_source(
3322 content,
3323 segment_start,
3324 escape_start,
3325 );
3326 Self::push_capture_char(content, esc);
3327 self.advance();
3328 }
3329 '}' => {
3330 Self::push_capture_char(content, '\\');
3332 Self::push_capture_char(content, '}');
3333 self.advance();
3334 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3335 }
3336 _ => {
3337 Self::push_capture_char(content, '\\');
3338 Self::push_capture_char(content, esc);
3339 self.advance();
3340 }
3341 }
3342 } else {
3343 Self::push_capture_char(content, '\\');
3344 }
3345 }
3346 '$' => {
3347 Self::push_capture_char(content, '$');
3348 self.advance();
3349 if self.peek_char() == Some('(') {
3350 if self.second_char() == Some('(') {
3351 if !self.read_arithmetic_expansion_into(content) {
3352 borrowable = false;
3353 }
3354 } else {
3355 Self::push_capture_char(content, '(');
3356 self.advance();
3357 self.read_command_subst_into(content);
3358 }
3359 } else if self.peek_char() == Some('{') {
3360 Self::push_capture_char(content, '{');
3361 self.advance();
3362 borrowable &= self.read_param_expansion_into(content, segment_start);
3363 }
3364 }
3365 _ => {
3366 Self::push_capture_char(content, c);
3367 self.advance();
3368 }
3369 }
3370 }
3371 borrowable
3372 }
3373
3374 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3375 let mut chars = self.lookahead_chars().peekable();
3376 let mut depth = target_depth;
3377 let mut in_single = false;
3378 let mut in_double = false;
3379 let mut double_quote_depth = 0usize;
3380
3381 while let Some(ch) = chars.next() {
3382 if in_single {
3383 match ch {
3384 '\'' => in_single = false,
3385 '\\' if chars.peek() == Some(&'"') => {
3386 chars.next();
3387 }
3388 '\\' => {}
3389 _ => {}
3390 }
3391 continue;
3392 }
3393
3394 if in_double {
3395 match ch {
3396 '"' => {
3397 in_double = false;
3398 double_quote_depth = 0;
3399 }
3400 '\\' => {
3401 chars.next();
3402 }
3403 '$' if chars.peek() == Some(&'{') => {
3404 chars.next();
3405 depth += 1;
3406 }
3407 '}' if depth > double_quote_depth => {
3408 depth -= 1;
3409 }
3410 _ => {}
3411 }
3412 continue;
3413 }
3414
3415 match ch {
3416 '\n' if depth == target_depth => return false,
3417 '\'' => in_single = true,
3418 '"' => {
3419 in_double = true;
3420 double_quote_depth = depth;
3421 }
3422 '\\' => {
3423 chars.next();
3424 }
3425 '$' if chars.peek() == Some(&'{') => {
3426 chars.next();
3427 depth += 1;
3428 }
3429 '}' => {
3430 if depth == target_depth {
3431 return true;
3432 }
3433 depth -= 1;
3434 }
3435 _ => {}
3436 }
3437 }
3438
3439 false
3440 }
3441
3442 fn looks_like_brace_expansion(&self) -> bool {
3448 const MAX_LOOKAHEAD: usize = 10_000;
3449
3450 let mut chars = self.lookahead_chars();
3451
3452 if chars.next() != Some('{') {
3454 return false;
3455 }
3456
3457 let mut depth = 1;
3458 let mut has_comma = false;
3459 let mut has_dot_dot = false;
3460 let mut prev_char = None;
3461 let mut scanned = 0usize;
3462
3463 for ch in chars {
3464 scanned += 1;
3465 if scanned > MAX_LOOKAHEAD {
3466 return false;
3467 }
3468 match ch {
3469 '{' => depth += 1,
3470 '}' => {
3471 depth -= 1;
3472 if depth == 0 {
3473 return has_comma || has_dot_dot;
3475 }
3476 }
3477 ',' if depth == 1 => has_comma = true,
3478 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3479 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3481 _ => {}
3482 }
3483 prev_char = Some(ch);
3484 }
3485
3486 false
3487 }
3488
3489 fn looks_like_mid_word_brace_segment(&self) -> bool {
3492 const MAX_LOOKAHEAD: usize = 10_000;
3493
3494 let mut chars = self.lookahead_chars();
3495 if chars.next() != Some('{') {
3496 return false;
3497 }
3498
3499 let mut brace_depth = 1;
3500 let mut paren_depth = 0usize;
3501 let mut escaped = false;
3502 let mut in_single = false;
3503 let mut in_double = false;
3504 let mut in_backtick = false;
3505 let mut prev_char = None;
3506 let mut scanned = 0usize;
3507
3508 for ch in chars {
3509 scanned += 1;
3510 if scanned > MAX_LOOKAHEAD {
3511 return false;
3512 }
3513
3514 if !in_single
3515 && !in_double
3516 && !in_backtick
3517 && !escaped
3518 && brace_depth == 1
3519 && paren_depth == 0
3520 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3521 {
3522 return false;
3523 }
3524
3525 if escaped {
3526 escaped = false;
3527 prev_char = Some(ch);
3528 continue;
3529 }
3530
3531 match ch {
3532 '\\' => escaped = true,
3533 '\'' if !in_double && !in_backtick => in_single = !in_single,
3534 '"' if !in_single && !in_backtick => in_double = !in_double,
3535 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3536 '(' if !in_single
3537 && !in_double
3538 && !in_backtick
3539 && (paren_depth > 0 || prev_char == Some('$')) =>
3540 {
3541 paren_depth += 1
3542 }
3543 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3544 paren_depth -= 1
3545 }
3546 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3547 '}' => {
3548 brace_depth -= 1;
3549 if brace_depth == 0 {
3550 return true;
3551 }
3552 }
3553 _ => {}
3554 }
3555
3556 prev_char = Some(ch);
3557 }
3558
3559 false
3560 }
3561
3562 fn is_brace_group_start(&self) -> bool {
3564 let mut chars = self.lookahead_chars();
3565 if chars.next() != Some('{') {
3567 return false;
3568 }
3569 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3571 }
3572
3573 fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3576 const MAX_LOOKAHEAD: usize = 10_000;
3577
3578 let mut chars = self.lookahead_chars();
3579 let mut depth = 1;
3580 let mut has_comma = false;
3581 let mut has_dot_dot = false;
3582 let mut prev_char = None;
3583 let mut scanned = 0usize;
3584
3585 for ch in chars.by_ref() {
3586 scanned += 1;
3587 if scanned > MAX_LOOKAHEAD {
3588 return false;
3589 }
3590 match ch {
3591 '{' => depth += 1,
3592 '}' => {
3593 depth -= 1;
3594 if depth == 0 {
3595 return has_comma || has_dot_dot;
3596 }
3597 }
3598 ',' if depth == 1 => has_comma = true,
3599 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3600 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3601 _ => {}
3602 }
3603 prev_char = Some(ch);
3604 }
3605
3606 false
3607 }
3608
3609 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3611 let mut word = String::with_capacity(16);
3612
3613 if let Some('{') = self.peek_char() {
3615 word.push('{');
3616 self.advance();
3617 } else {
3618 return None;
3619 }
3620
3621 let mut depth = 1;
3623 while let Some(ch) = self.peek_char() {
3624 word.push(ch);
3625 self.advance();
3626 match ch {
3627 '{' => depth += 1,
3628 '}' => {
3629 depth -= 1;
3630 if depth == 0 {
3631 break;
3632 }
3633 }
3634 _ => {}
3635 }
3636 }
3637
3638 while let Some(ch) = self.peek_char() {
3640 if Self::is_word_char(ch) {
3641 if self.reinject_buf.is_empty() {
3642 let chunk = self.cursor.eat_while(Self::is_word_char);
3643 word.push_str(chunk);
3644 self.advance_scanned_source_bytes(chunk.len());
3645 } else {
3646 word.push(ch);
3647 self.advance();
3648 }
3649 } else {
3650 break;
3651 }
3652 }
3653
3654 Some(LexedToken::owned_word(TokenKind::Word, word))
3655 }
3656
3657 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3659 let mut word = String::with_capacity(16);
3660
3661 if let Some('{') = self.peek_char() {
3663 word.push('{');
3664 self.advance();
3665 } else {
3666 return None;
3667 }
3668
3669 let mut depth = 1;
3671 while let Some(ch) = self.peek_char() {
3672 word.push(ch);
3673 self.advance();
3674 match ch {
3675 '{' => depth += 1,
3676 '}' => {
3677 depth -= 1;
3678 if depth == 0 {
3679 break;
3680 }
3681 }
3682 _ => {}
3683 }
3684 }
3685
3686 while let Some(ch) = self.peek_char() {
3688 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3689 if ch == '{' {
3690 word.push(ch);
3692 self.advance();
3693 let mut inner_depth = 1;
3694 while let Some(c) = self.peek_char() {
3695 word.push(c);
3696 self.advance();
3697 match c {
3698 '{' => inner_depth += 1,
3699 '}' => {
3700 inner_depth -= 1;
3701 if inner_depth == 0 {
3702 break;
3703 }
3704 }
3705 _ => {}
3706 }
3707 }
3708 } else {
3709 word.push(ch);
3710 self.advance();
3711 }
3712 } else {
3713 break;
3714 }
3715 }
3716
3717 Some(LexedToken::owned_word(TokenKind::Word, word))
3718 }
3719
3720 fn looks_like_assoc_assign(&self) -> bool {
3724 let mut chars = self.lookahead_chars();
3725 if chars.next() != Some('(') {
3727 return false;
3728 }
3729 for ch in chars {
3731 match ch {
3732 ' ' | '\t' => continue,
3733 '[' => return true,
3734 _ => return false,
3735 }
3736 }
3737 false
3738 }
3739
3740 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3741 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3742 }
3743
3744 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3745 word.segments().any(|segment| {
3746 matches!(
3747 segment.kind(),
3748 LexedWordSegmentKind::SingleQuoted
3749 | LexedWordSegmentKind::DollarSingleQuoted
3750 | LexedWordSegmentKind::DoubleQuoted
3751 | LexedWordSegmentKind::DollarDoubleQuoted
3752 )
3753 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3754 }
3755
3756 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3757 text.contains(['*', '?'])
3758 || text.ends_with('}') && text.contains("${")
3759 || text.ends_with(']')
3760 && text
3761 .rfind('[')
3762 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3763 }
3764
3765 fn is_word_char(ch: char) -> bool {
3766 !matches!(
3767 ch,
3768 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3769 )
3770 }
3771
3772 const fn is_ascii_word_byte(byte: u8) -> bool {
3773 !matches!(
3774 byte,
3775 b' ' | b'\t'
3776 | b'\n'
3777 | b';'
3778 | b'|'
3779 | b'&'
3780 | b'>'
3781 | b'<'
3782 | b'('
3783 | b')'
3784 | b'{'
3785 | b'}'
3786 | b'\''
3787 | b'"'
3788 )
3789 }
3790
3791 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3792 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3793 }
3794
3795 fn is_plain_word_char(ch: char) -> bool {
3796 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3797 }
3798
3799 pub fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3801 let mut content = String::with_capacity(64);
3802 let mut current_line = String::with_capacity(64);
3803
3804 let mut rest_of_line = String::with_capacity(32);
3811 let rest_of_line_start = self.current_position();
3812 let mut in_double_quote = false;
3813 let mut in_single_quote = false;
3814 let mut in_comment = false;
3815 let mut saw_non_whitespace_tail = false;
3816 let mut consecutive_backslashes = 0usize;
3817 let mut previous_tail_char = None;
3818 while let Some(ch) = self.peek_char() {
3819 self.advance();
3820 if in_comment {
3821 if ch == '\n' {
3822 break;
3823 }
3824 rest_of_line.push(ch);
3825 previous_tail_char = Some(ch);
3826 continue;
3827 }
3828 if ch == '#'
3829 && !in_single_quote
3830 && !in_double_quote
3831 && self.comments_enabled()
3832 && heredoc_tail_hash_starts_comment(previous_tail_char)
3833 {
3834 in_comment = true;
3835 rest_of_line.push(ch);
3836 previous_tail_char = Some(ch);
3837 consecutive_backslashes = 0;
3838 continue;
3839 }
3840 let backslash_continues_line = ch == '\\'
3841 && !in_single_quote
3842 && self.peek_char() == Some('\n')
3843 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3844 && consecutive_backslashes.is_multiple_of(2);
3845 if backslash_continues_line {
3846 rest_of_line.push(ch);
3847 rest_of_line.push('\n');
3848 self.advance();
3849 consecutive_backslashes = 0;
3850 continue;
3851 }
3852 if ch == '\n' && !in_double_quote && !in_single_quote {
3853 break;
3854 }
3855 if ch == '"' && !in_single_quote {
3856 in_double_quote = !in_double_quote;
3857 } else if ch == '\'' && !in_double_quote {
3858 in_single_quote = !in_single_quote;
3859 } else if ch == '\\' && in_double_quote {
3860 rest_of_line.push(ch);
3862 if let Some(next) = self.peek_char() {
3863 rest_of_line.push(next);
3864 self.advance();
3865 }
3866 continue;
3867 }
3868 rest_of_line.push(ch);
3869 if !ch.is_whitespace() {
3870 saw_non_whitespace_tail = true;
3871 }
3872 if ch == '\\' && !in_single_quote {
3873 consecutive_backslashes += 1;
3874 } else {
3875 consecutive_backslashes = 0;
3876 }
3877 previous_tail_char = Some(ch);
3878 }
3879
3880 self.sync_offset_to_cursor();
3884 let content_start = self.current_position();
3885 let mut current_line_start = content_start;
3886 let content_end;
3887
3888 loop {
3890 if self.reinject_buf.is_empty() {
3891 self.sync_offset_to_cursor();
3897 let rest = self.cursor.rest();
3898 if rest.is_empty() {
3899 content_end = self.current_position();
3900 break;
3901 }
3902
3903 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3904 let line = &rest[..line_len];
3905 let has_newline = line_len < rest.len();
3906
3907 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3908 content_end = current_line_start;
3909 self.consume_source_bytes(line_len);
3910 if has_newline {
3911 self.consume_ascii_chars(1);
3912 }
3913 break;
3914 }
3915
3916 content.push_str(line);
3917 self.consume_source_bytes(line_len);
3918
3919 if has_newline {
3920 self.consume_ascii_chars(1);
3921 content.push('\n');
3922 current_line_start = self.current_position();
3923 continue;
3924 }
3925
3926 content_end = self.current_position();
3927 break;
3928 }
3929
3930 match self.peek_char() {
3931 Some('\n') => {
3932 self.advance();
3933 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
3935 content_end = current_line_start;
3936 break;
3937 }
3938 content.push_str(¤t_line);
3939 content.push('\n');
3940 current_line.clear();
3941 current_line_start = self.current_position();
3942 }
3943 Some(ch) => {
3944 current_line.push(ch);
3945 self.advance();
3946 }
3947 None => {
3948 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
3950 content_end = current_line_start;
3951 break;
3952 }
3953 if !current_line.is_empty() {
3954 content.push_str(¤t_line);
3955 }
3956 content_end = self.current_position();
3957 break;
3958 }
3959 }
3960 }
3961
3962 let post_heredoc_offset = self.offset;
3967 self.offset = rest_of_line_start.offset;
3968 for ch in rest_of_line.chars() {
3969 self.reinject_buf.push_back(ch);
3970 }
3971 self.reinject_buf.push_back('\n');
3972 self.reinject_resume_offset = Some(post_heredoc_offset);
3973
3974 HeredocRead {
3975 content,
3976 content_span: Span::from_positions(content_start, content_end),
3977 }
3978 }
3979
3980 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
3981 let mut chars = self.cursor.rest().chars();
3982 if chars.next() != Some('\n') {
3983 return false;
3984 }
3985
3986 for ch in chars {
3987 if matches!(ch, ' ' | '\t') {
3988 continue;
3989 }
3990 if ch == '\n' {
3991 return false;
3992 }
3993 return matches!(ch, '|' | '&' | ';' | '<' | '>')
3994 || (ch == '#' && self.comments_enabled());
3995 }
3996
3997 false
3998 }
3999}
4000
4001fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4002 let line = if strip_tabs {
4003 line.trim_start_matches('\t')
4004 } else {
4005 line
4006 };
4007
4008 if line == delimiter {
4009 return true;
4010 }
4011
4012 let Some(trailing) = line.strip_prefix(delimiter) else {
4013 return false;
4014 };
4015
4016 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4017}
4018
4019fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4020 previous_tail_char.is_none_or(|prev| {
4021 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4022 })
4023}
4024
4025fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4026 let ch = input.get(index..)?.chars().next()?;
4027 Some((ch, index + ch.len_utf8()))
4028}
4029
4030fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4031 let mut index = 0usize;
4032 let mut depth = 0usize;
4033 let mut in_single = false;
4034 let mut in_double = false;
4035 let mut in_backtick = false;
4036 let mut escaped = false;
4037
4038 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4039 let was_escaped = escaped;
4040 if ch == '\\' && !in_single {
4041 escaped = !escaped;
4042 index = next_index;
4043 continue;
4044 }
4045 escaped = false;
4046
4047 match ch {
4048 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4049 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4050 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4051 '(' if !in_single
4052 && !in_double
4053 && !in_backtick
4054 && !was_escaped
4055 && prefix[next_index..].starts_with('(') =>
4056 {
4057 depth += 1;
4058 index = next_index + '('.len_utf8();
4059 continue;
4060 }
4061 ')' if !in_single
4062 && !in_double
4063 && !in_backtick
4064 && !was_escaped
4065 && prefix[next_index..].starts_with(')') =>
4066 {
4067 depth = depth.saturating_sub(1);
4068 index = next_index + ')'.len_utf8();
4069 continue;
4070 }
4071 _ => {}
4072 }
4073
4074 index = next_index;
4075 }
4076
4077 depth > 0
4078}
4079
4080fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4081 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4082 let prefix = &input[line_start..index];
4083 line_has_unclosed_double_paren(prefix)
4084}
4085
4086fn hash_starts_comment(input: &str, index: usize) -> bool {
4087 if inside_unclosed_double_paren_on_line(input, index) {
4088 return false;
4089 }
4090
4091 let next = &input[index + '#'.len_utf8()..];
4092 input[..index]
4093 .chars()
4094 .next_back()
4095 .is_none_or(|prev| match prev {
4096 '(' => {
4097 let whitespace_index = next.find(char::is_whitespace);
4098 let close_index = next.find(')');
4099
4100 match (whitespace_index, close_index) {
4101 (Some(whitespace), Some(close)) => whitespace < close,
4102 (Some(_), None) | (None, None) => true,
4103 (None, Some(_)) => false,
4104 }
4105 }
4106 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4107 })
4108}
4109
4110fn heredoc_delimiter_is_terminator(
4111 ch: char,
4112 in_single: bool,
4113 in_double: bool,
4114 escaped: bool,
4115) -> bool {
4116 !in_single
4117 && !in_double
4118 && !escaped
4119 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4120}
4121
4122fn scan_double_quoted_command_substitution_segment(
4123 input: &str,
4124 mut index: usize,
4125 subst_depth: usize,
4126) -> Option<usize> {
4127 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4128 match ch {
4129 '"' => return Some(next_index),
4130 '\\' => {
4131 index = next_index;
4132 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4133 index = escaped_next;
4134 }
4135 }
4136 '$' if input[next_index..].starts_with('{') => {
4137 let consumed = scan_command_subst_parameter_expansion_len(
4138 &input[next_index + '{'.len_utf8()..],
4139 subst_depth,
4140 )?;
4141 index = next_index + '{'.len_utf8() + consumed;
4142 }
4143 '$' if input[next_index..].starts_with('(')
4144 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4145 {
4146 let consumed = scan_command_substitution_body_len_inner(
4147 &input[next_index + '('.len_utf8()..],
4148 subst_depth + 1,
4149 )?;
4150 index = next_index + '('.len_utf8() + consumed;
4151 }
4152 _ => index = next_index,
4153 }
4154 }
4155
4156 None
4157}
4158
4159fn scan_command_subst_parameter_expansion_len(input: &str, subst_depth: usize) -> Option<usize> {
4160 let mut index = 0usize;
4161 let mut in_single = false;
4162 let mut in_double = false;
4163 let mut in_ansi_c_single = false;
4164 let mut in_backtick = false;
4165 let mut escaped = false;
4166 let mut ansi_c_quote_pending = false;
4167
4168 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4169 let was_escaped = escaped;
4170 if ch == '\\' && !in_single {
4171 escaped = !escaped;
4172 index = next_index;
4173 ansi_c_quote_pending = false;
4174 continue;
4175 }
4176 escaped = false;
4177
4178 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4179 if input[next_index..].starts_with('{')
4180 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4181 &input[next_index + '{'.len_utf8()..],
4182 subst_depth,
4183 )
4184 {
4185 index = next_index + '{'.len_utf8() + consumed;
4186 ansi_c_quote_pending = false;
4187 continue;
4188 }
4189
4190 if input[next_index..].starts_with('(')
4191 && !input[next_index + '('.len_utf8()..].starts_with('(')
4192 && let Some(consumed) = scan_command_substitution_body_len_inner(
4193 &input[next_index + '('.len_utf8()..],
4194 subst_depth + 1,
4195 )
4196 {
4197 index = next_index + '('.len_utf8() + consumed;
4198 ansi_c_quote_pending = false;
4199 continue;
4200 }
4201 }
4202
4203 if !in_single
4204 && !in_ansi_c_single
4205 && !in_double
4206 && !in_backtick
4207 && !was_escaped
4208 && matches!(ch, '<' | '>')
4209 && input[next_index..].starts_with('(')
4210 && let Some(consumed) = scan_command_substitution_body_len_inner(
4211 &input[next_index + '('.len_utf8()..],
4212 subst_depth + 1,
4213 )
4214 {
4215 index = next_index + '('.len_utf8() + consumed;
4216 ansi_c_quote_pending = false;
4217 continue;
4218 }
4219
4220 match ch {
4221 '\'' if !in_double && !in_backtick && !was_escaped => {
4222 if in_ansi_c_single {
4223 in_ansi_c_single = false;
4224 } else if !in_single && ansi_c_quote_pending {
4225 in_ansi_c_single = true;
4226 } else {
4227 in_single = !in_single;
4228 }
4229 }
4230 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4231 in_double = !in_double
4232 }
4233 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4234 in_backtick = !in_backtick
4235 }
4236 '}' if !in_single
4237 && !in_ansi_c_single
4238 && !in_double
4239 && !in_backtick
4240 && !was_escaped =>
4241 {
4242 return Some(next_index);
4243 }
4244 _ => {}
4245 }
4246
4247 ansi_c_quote_pending = ch == '$'
4248 && !in_single
4249 && !in_ansi_c_single
4250 && !in_double
4251 && !in_backtick
4252 && !was_escaped;
4253 index = next_index;
4254 }
4255
4256 None
4257}
4258
4259fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4260 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4261 if !matches!(ch, ' ' | '\t') {
4262 break;
4263 }
4264 index = next_index;
4265 }
4266
4267 let start = index;
4268 let mut cooked = String::new();
4269 let mut in_single = false;
4270 let mut in_double = false;
4271 let mut escaped = false;
4272
4273 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4274 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4275 break;
4276 }
4277
4278 index = next_index;
4279 if escaped {
4280 cooked.push(ch);
4281 escaped = false;
4282 continue;
4283 }
4284
4285 match ch {
4286 '\\' if !in_single => escaped = true,
4287 '\'' if !in_double => in_single = !in_single,
4288 '"' if !in_single => in_double = !in_double,
4289 _ => cooked.push(ch),
4290 }
4291 }
4292
4293 (index > start).then_some((index, cooked))
4294}
4295
4296fn skip_command_subst_pending_heredoc(
4297 input: &str,
4298 mut index: usize,
4299 delimiter: &str,
4300 strip_tabs: bool,
4301) -> usize {
4302 while index <= input.len() {
4303 let rest = &input[index..];
4304 let line_len = rest.find('\n').unwrap_or(rest.len());
4305 let line = &rest[..line_len];
4306 let has_newline = line_len < rest.len();
4307
4308 index += line_len;
4309 if has_newline {
4310 index += '\n'.len_utf8();
4311 }
4312
4313 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4314 return index;
4315 }
4316 }
4317
4318 index
4319}
4320
4321fn scan_command_subst_ansi_c_single_quoted_segment(
4322 input: &str,
4323 quote_index: usize,
4324) -> Option<usize> {
4325 let mut index = quote_index + '\''.len_utf8();
4326
4327 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4328 index = next_index;
4329 if ch == '\\' {
4330 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4331 index = escaped_next;
4332 }
4333 continue;
4334 }
4335
4336 if ch == '\'' {
4337 return Some(index);
4338 }
4339 }
4340
4341 None
4342}
4343
4344fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4345 let mut index = start;
4346
4347 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4348 index = next_index;
4349 if ch == '\\' {
4350 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4351 index = escaped_next;
4352 }
4353 continue;
4354 }
4355
4356 if ch == '`' {
4357 return Some(index);
4358 }
4359 }
4360
4361 None
4362}
4363
4364fn flush_scanned_command_subst_keyword(
4365 current_word: &mut String,
4366 pending_case_headers: &mut usize,
4367 case_clause_depths: &mut SmallVec<[usize; 4]>,
4368 depth: usize,
4369 word_started_at_command_start: &mut bool,
4370) {
4371 if current_word.is_empty() {
4372 *word_started_at_command_start = false;
4373 return;
4374 }
4375
4376 match current_word.as_str() {
4377 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4378 "in" if *pending_case_headers > 0 => {
4379 *pending_case_headers -= 1;
4380 case_clause_depths.push(depth);
4381 }
4382 "esac" if *word_started_at_command_start => {
4383 case_clause_depths.pop();
4384 }
4385 _ => {}
4386 }
4387
4388 current_word.clear();
4389 *word_started_at_command_start = false;
4390}
4391
4392fn scan_command_substitution_body_len_inner(input: &str, subst_depth: usize) -> Option<usize> {
4393 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4394 return None;
4395 }
4396
4397 let mut index = 0usize;
4398 let mut depth = 1;
4399 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4400 let mut pending_case_headers = 0usize;
4401 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4402 let mut current_word = String::with_capacity(16);
4403 let mut at_command_start = true;
4404 let mut expecting_redirection_target = false;
4405 let mut current_word_started_at_command_start = false;
4406
4407 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4408 match ch {
4409 '#' if hash_starts_comment(input, index) => {
4410 let had_word = !current_word.is_empty();
4411 flush_scanned_command_subst_keyword(
4412 &mut current_word,
4413 &mut pending_case_headers,
4414 &mut case_clause_depths,
4415 depth,
4416 &mut current_word_started_at_command_start,
4417 );
4418 if had_word && expecting_redirection_target {
4419 expecting_redirection_target = false;
4420 }
4421 index = next_index;
4422 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4423 index = comment_next;
4424 if comment_ch == '\n' {
4425 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4426 index = skip_command_subst_pending_heredoc(
4427 input, index, &delimiter, strip_tabs,
4428 );
4429 }
4430 at_command_start = true;
4431 expecting_redirection_target = false;
4432 break;
4433 }
4434 }
4435 }
4436 '(' => {
4437 flush_scanned_command_subst_keyword(
4438 &mut current_word,
4439 &mut pending_case_headers,
4440 &mut case_clause_depths,
4441 depth,
4442 &mut current_word_started_at_command_start,
4443 );
4444 depth += 1;
4445 index = next_index;
4446 at_command_start = true;
4447 expecting_redirection_target = false;
4448 }
4449 ')' => {
4450 flush_scanned_command_subst_keyword(
4451 &mut current_word,
4452 &mut pending_case_headers,
4453 &mut case_clause_depths,
4454 depth,
4455 &mut current_word_started_at_command_start,
4456 );
4457 if case_clause_depths
4458 .last()
4459 .is_some_and(|case_depth| *case_depth == depth)
4460 {
4461 index = next_index;
4462 at_command_start = true;
4463 expecting_redirection_target = false;
4464 continue;
4465 }
4466 depth -= 1;
4467 index = next_index;
4468 if depth == 0 {
4469 return Some(index);
4470 }
4471 at_command_start = false;
4472 expecting_redirection_target = false;
4473 }
4474 '"' => {
4475 let had_word = !current_word.is_empty();
4476 flush_scanned_command_subst_keyword(
4477 &mut current_word,
4478 &mut pending_case_headers,
4479 &mut case_clause_depths,
4480 depth,
4481 &mut current_word_started_at_command_start,
4482 );
4483 if had_word && expecting_redirection_target {
4484 expecting_redirection_target = false;
4485 }
4486 index = scan_double_quoted_command_substitution_segment(
4487 input,
4488 next_index,
4489 subst_depth,
4490 )?;
4491 if expecting_redirection_target {
4492 expecting_redirection_target = false;
4493 } else {
4494 at_command_start = false;
4495 }
4496 }
4497 '\'' => {
4498 let had_word = !current_word.is_empty();
4499 flush_scanned_command_subst_keyword(
4500 &mut current_word,
4501 &mut pending_case_headers,
4502 &mut case_clause_depths,
4503 depth,
4504 &mut current_word_started_at_command_start,
4505 );
4506 if had_word && expecting_redirection_target {
4507 expecting_redirection_target = false;
4508 }
4509 index = next_index;
4510 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4511 index = quoted_next;
4512 if quoted_ch == '\'' {
4513 break;
4514 }
4515 }
4516 if expecting_redirection_target {
4517 expecting_redirection_target = false;
4518 } else {
4519 at_command_start = false;
4520 }
4521 }
4522 '`' => {
4523 let had_word = !current_word.is_empty();
4524 flush_scanned_command_subst_keyword(
4525 &mut current_word,
4526 &mut pending_case_headers,
4527 &mut case_clause_depths,
4528 depth,
4529 &mut current_word_started_at_command_start,
4530 );
4531 if had_word && expecting_redirection_target {
4532 expecting_redirection_target = false;
4533 }
4534 index = scan_command_subst_backtick_segment(input, next_index)?;
4535 if expecting_redirection_target {
4536 expecting_redirection_target = false;
4537 } else {
4538 at_command_start = false;
4539 }
4540 }
4541 '$' if input[next_index..].starts_with('\'') => {
4542 let had_word = !current_word.is_empty();
4543 flush_scanned_command_subst_keyword(
4544 &mut current_word,
4545 &mut pending_case_headers,
4546 &mut case_clause_depths,
4547 depth,
4548 &mut current_word_started_at_command_start,
4549 );
4550 if had_word && expecting_redirection_target {
4551 expecting_redirection_target = false;
4552 }
4553 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4554 if expecting_redirection_target {
4555 expecting_redirection_target = false;
4556 } else {
4557 at_command_start = false;
4558 }
4559 }
4560 '\\' => {
4561 let had_word = !current_word.is_empty();
4562 flush_scanned_command_subst_keyword(
4563 &mut current_word,
4564 &mut pending_case_headers,
4565 &mut case_clause_depths,
4566 depth,
4567 &mut current_word_started_at_command_start,
4568 );
4569 if had_word && expecting_redirection_target {
4570 expecting_redirection_target = false;
4571 }
4572 index = next_index;
4573 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4574 index = escaped_next;
4575 }
4576 if expecting_redirection_target {
4577 expecting_redirection_target = false;
4578 } else {
4579 at_command_start = false;
4580 }
4581 }
4582 '>' => {
4583 let word_was_redirection_fd = current_word_started_at_command_start
4584 && !current_word.is_empty()
4585 && current_word.chars().all(|current| current.is_ascii_digit());
4586 flush_scanned_command_subst_keyword(
4587 &mut current_word,
4588 &mut pending_case_headers,
4589 &mut case_clause_depths,
4590 depth,
4591 &mut current_word_started_at_command_start,
4592 );
4593 if word_was_redirection_fd {
4594 at_command_start = true;
4595 }
4596 index = next_index;
4597 expecting_redirection_target = true;
4598 }
4599 '<' if input[next_index..].starts_with('<') => {
4600 let word_was_redirection_fd = current_word_started_at_command_start
4601 && !current_word.is_empty()
4602 && current_word.chars().all(|current| current.is_ascii_digit());
4603 let had_word = !current_word.is_empty();
4604 flush_scanned_command_subst_keyword(
4605 &mut current_word,
4606 &mut pending_case_headers,
4607 &mut case_clause_depths,
4608 depth,
4609 &mut current_word_started_at_command_start,
4610 );
4611 if had_word && expecting_redirection_target {
4612 expecting_redirection_target = false;
4613 }
4614 if word_was_redirection_fd {
4615 at_command_start = true;
4616 }
4617 if inside_unclosed_double_paren_on_line(input, index) {
4618 index = next_index + '<'.len_utf8();
4619 continue;
4620 }
4621
4622 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4623 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4624 expecting_redirection_target = true;
4625 continue;
4626 }
4627
4628 let strip_tabs = input[next_index..].starts_with("<-");
4629 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4630 if let Some((delimiter_index, delimiter)) =
4631 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4632 {
4633 pending_heredocs.push((delimiter, strip_tabs));
4634 index = delimiter_index;
4635 expecting_redirection_target = false;
4636 } else {
4637 index = next_index;
4638 expecting_redirection_target = true;
4639 }
4640 }
4641 '\n' => {
4642 flush_scanned_command_subst_keyword(
4643 &mut current_word,
4644 &mut pending_case_headers,
4645 &mut case_clause_depths,
4646 depth,
4647 &mut current_word_started_at_command_start,
4648 );
4649 index = next_index;
4650 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4651 index =
4652 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4653 }
4654 at_command_start = true;
4655 expecting_redirection_target = false;
4656 }
4657 '$' if input[next_index..].starts_with('{') => {
4658 let had_word = !current_word.is_empty();
4659 flush_scanned_command_subst_keyword(
4660 &mut current_word,
4661 &mut pending_case_headers,
4662 &mut case_clause_depths,
4663 depth,
4664 &mut current_word_started_at_command_start,
4665 );
4666 if had_word && expecting_redirection_target {
4667 expecting_redirection_target = false;
4668 }
4669 let consumed = scan_command_subst_parameter_expansion_len(
4670 &input[next_index + '{'.len_utf8()..],
4671 subst_depth,
4672 )?;
4673 index = next_index + '{'.len_utf8() + consumed;
4674 if expecting_redirection_target {
4675 expecting_redirection_target = false;
4676 } else {
4677 at_command_start = false;
4678 }
4679 }
4680 '$' if input[next_index..].starts_with('(')
4681 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4682 {
4683 let had_word = !current_word.is_empty();
4684 flush_scanned_command_subst_keyword(
4685 &mut current_word,
4686 &mut pending_case_headers,
4687 &mut case_clause_depths,
4688 depth,
4689 &mut current_word_started_at_command_start,
4690 );
4691 if had_word && expecting_redirection_target {
4692 expecting_redirection_target = false;
4693 }
4694 let consumed = scan_command_substitution_body_len_inner(
4695 &input[next_index + '('.len_utf8()..],
4696 subst_depth + 1,
4697 )?;
4698 index = next_index + '('.len_utf8() + consumed;
4699 if expecting_redirection_target {
4700 expecting_redirection_target = false;
4701 } else {
4702 at_command_start = false;
4703 }
4704 }
4705 _ => {
4706 if ch.is_ascii_alphanumeric() || ch == '_' {
4707 if current_word.is_empty() && !expecting_redirection_target && at_command_start
4708 {
4709 current_word_started_at_command_start = true;
4710 at_command_start = false;
4711 }
4712 current_word.push(ch);
4713 } else {
4714 let had_word = !current_word.is_empty();
4715 flush_scanned_command_subst_keyword(
4716 &mut current_word,
4717 &mut pending_case_headers,
4718 &mut case_clause_depths,
4719 depth,
4720 &mut current_word_started_at_command_start,
4721 );
4722 if had_word && expecting_redirection_target {
4723 expecting_redirection_target = false;
4724 }
4725 match ch {
4726 ' ' | '\t' => {}
4727 ';' | '|' | '&' => {
4728 at_command_start = true;
4729 expecting_redirection_target = false;
4730 }
4731 _ => {
4732 if !expecting_redirection_target {
4733 at_command_start = false;
4734 }
4735 }
4736 }
4737 }
4738 index = next_index;
4739 }
4740 }
4741 }
4742
4743 None
4744}
4745
4746pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4747 scan_command_substitution_body_len_inner(input, 0)
4748}
4749
4750#[cfg(test)]
4751mod tests {
4752 use super::*;
4753
4754 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4755 match token.kind {
4756 kind if kind.is_word_like() => token.word_string(),
4757 TokenKind::Comment => token
4758 .span
4759 .slice(source)
4760 .strip_prefix('#')
4761 .map(str::to_string),
4762 TokenKind::Error => token
4763 .error_kind()
4764 .map(LexerErrorKind::message)
4765 .map(str::to_string),
4766 _ => None,
4767 }
4768 }
4769
4770 fn assert_next_token(
4771 lexer: &mut Lexer<'_>,
4772 expected_kind: TokenKind,
4773 expected_text: Option<&str>,
4774 ) {
4775 let token = lexer.next_lexed_token().unwrap();
4776 assert_eq!(token.kind, expected_kind);
4777 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4778 }
4779
4780 fn assert_next_token_with_comments(
4781 lexer: &mut Lexer<'_>,
4782 expected_kind: TokenKind,
4783 expected_text: Option<&str>,
4784 ) {
4785 let token = lexer.next_lexed_token_with_comments().unwrap();
4786 assert_eq!(token.kind, expected_kind);
4787 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4788 }
4789
4790 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4791 let mut lexer = Lexer::new(input);
4792
4793 while let Some(token) = lexer.next_lexed_token() {
4794 if token.kind == TokenKind::Newline {
4795 continue;
4796 }
4797
4798 assert_eq!(
4799 token.span.start.line, token.span.end.line,
4800 "token should stay on one line: {:?}",
4801 token
4802 );
4803 }
4804 }
4805
4806 #[test]
4807 fn test_simple_words() {
4808 let mut lexer = Lexer::new("echo hello world");
4809
4810 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4811 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
4812 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
4813 assert!(lexer.next_lexed_token().is_none());
4814 }
4815
4816 #[test]
4817 fn test_single_quoted_string() {
4818 let mut lexer = Lexer::new("echo 'hello world'");
4819
4820 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4821 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
4823 assert!(lexer.next_lexed_token().is_none());
4824 }
4825
4826 #[test]
4827 fn test_double_quoted_string() {
4828 let mut lexer = Lexer::new("echo \"hello world\"");
4829
4830 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
4831 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
4832 assert!(lexer.next_lexed_token().is_none());
4833 }
4834
4835 #[test]
4836 fn test_double_quoted_expansion_token_keeps_source_backing() {
4837 let source = r#""$bar""#;
4838 let mut lexer = Lexer::new(source);
4839
4840 let token = lexer.next_lexed_token().unwrap();
4841 assert_eq!(token.kind, TokenKind::QuotedWord);
4842 assert_eq!(token.word_text(), Some("$bar"));
4843
4844 let word = token.word().unwrap();
4845 let segment = word.single_segment().unwrap();
4846 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
4847 assert_eq!(segment.span().unwrap().slice(source), "$bar");
4848 }
4849
4850 #[test]
4851 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
4852 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
4853 let mut lexer = Lexer::new(source);
4854
4855 let token = lexer.next_lexed_token().unwrap();
4856 assert_eq!(token.kind, TokenKind::QuotedWord);
4857 assert_eq!(
4858 token.word_text(),
4859 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
4860 );
4861 }
4862
4863 #[test]
4864 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
4865 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
4866 let mut lexer = Lexer::new(source);
4867
4868 let token = lexer.next_lexed_token().unwrap();
4869 assert_eq!(token.kind, TokenKind::QuotedWord);
4870 assert_eq!(
4871 token.word_text(),
4872 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
4873 );
4874 }
4875
4876 #[test]
4877 fn test_mixed_word_keeps_segment_kinds() {
4878 let source = r#"foo"bar"'baz'"#;
4879 let mut lexer = Lexer::new(source);
4880
4881 let token = lexer.next_lexed_token().unwrap();
4882 assert_eq!(token.kind, TokenKind::Word);
4883
4884 let word = token.word().unwrap();
4885 let segments: Vec<_> = word
4886 .segments()
4887 .map(|segment| (segment.kind(), segment.as_str().to_string()))
4888 .collect();
4889
4890 assert_eq!(
4891 segments,
4892 vec![
4893 (LexedWordSegmentKind::Plain, "foo".to_string()),
4894 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
4895 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
4896 ]
4897 );
4898 assert_eq!(word.joined_text(), "foobarbaz");
4899 assert_eq!(
4900 word.segments()
4901 .next()
4902 .and_then(LexedWordSegment::span)
4903 .unwrap()
4904 .slice(source),
4905 "foo"
4906 );
4907 }
4908
4909 #[test]
4910 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
4911 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
4912
4913 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4914 let body = &source[..consumed];
4915
4916 assert!(body.contains("field, direction"));
4917 assert!(body.ends_with(')'));
4918 }
4919
4920 #[test]
4921 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
4922 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
4923
4924 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4925 let body = &source[..consumed];
4926
4927 assert!(body.contains("printf '%s' y"));
4928 assert!(body.ends_with(')'));
4929 }
4930
4931 #[test]
4932 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
4933 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
4934
4935 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4936 let body = &source[..consumed];
4937
4938 assert!(body.contains("printf %s 1,2"));
4939 assert!(body.ends_with(')'));
4940 }
4941
4942 #[test]
4943 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
4944 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
4945
4946 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4947 let body = &source[..consumed];
4948
4949 assert!(body.contains("field, direction"));
4950 assert!(body.ends_with(')'));
4951 }
4952
4953 #[test]
4954 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
4955 let source = "printf %s ${x//foo/)},1)\"";
4956
4957 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4958 let body = &source[..consumed];
4959
4960 assert!(body.contains("${x//foo/)},1"));
4961 assert!(body.ends_with(')'));
4962 }
4963
4964 #[test]
4965 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
4966 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
4967
4968 let consumed = scan_command_substitution_body_len(source).expect("expected match");
4969 let body = &source[..consumed];
4970
4971 assert!(body.contains("printf %s 1,2"));
4972 assert!(body.ends_with(')'));
4973 }
4974
4975 #[test]
4976 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
4977 let source = "[[ \"$buf\" == (#b)(*) ]]";
4978 let index = source.find('#').expect("expected hash");
4979
4980 assert!(!hash_starts_comment(source, index));
4981 }
4982
4983 #[test]
4984 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
4985 let source = "(#comment with )";
4986 let index = source.find('#').expect("expected hash");
4987
4988 assert!(hash_starts_comment(source, index));
4989 }
4990
4991 #[test]
4992 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
4993 let source = "(( #c < 256 ))";
4994 let index = source.find('#').expect("expected hash");
4995
4996 assert!(!hash_starts_comment(source, index));
4997 }
4998
4999 #[test]
5000 fn test_hash_starts_comment_respects_quoted_double_parens() {
5001 let source = "printf '((' # comment";
5002 let index = source.find('#').expect("expected hash");
5003
5004 assert!(hash_starts_comment(source, index));
5005 }
5006
5007 #[test]
5008 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5009 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5010
5011 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5012 let body = &source[..consumed];
5013
5014 assert!(body.contains("printf %s 1,2"));
5015 assert!(body.ends_with(')'));
5016 }
5017
5018 #[test]
5019 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5020 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5021
5022 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5023 let body = &source[..consumed];
5024
5025 assert!(body.contains("printf %s 1,2"));
5026 assert!(body.ends_with(')'));
5027 }
5028
5029 #[test]
5030 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5031 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5032
5033 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5034 let body = &source[..consumed];
5035
5036 assert!(body.contains("printf %s 1,2"));
5037 assert!(body.ends_with(')'));
5038 }
5039
5040 #[test]
5041 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5042 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5043
5044 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5045 let body = &source[..consumed];
5046
5047 assert!(body.contains("printf %s 1,2"));
5048 assert!(body.ends_with("))"));
5049 }
5050
5051 #[test]
5052 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5053 let source = "printf %s 1,2; echo case in)\"";
5054
5055 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5056 let body = &source[..consumed];
5057
5058 assert!(body.contains("echo case in"));
5059 assert!(body.ends_with(')'));
5060 }
5061
5062 #[test]
5063 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5064 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5065
5066 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5067 let body = &source[..consumed];
5068
5069 assert!(body.contains("$'a\\'b'"));
5070 assert!(body.contains("printf %s 1,2"));
5071 assert!(body.ends_with(')'));
5072 }
5073
5074 #[test]
5075 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5076 let source = "printf %s `echo foo)`; printf %s ok)\"";
5077
5078 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5079 let body = &source[..consumed];
5080
5081 assert!(body.contains("`echo foo)`"));
5082 assert!(body.contains("printf %s ok"));
5083 assert!(body.ends_with(')'));
5084 }
5085
5086 #[test]
5087 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5088 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5089
5090 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5091 let body = &source[..consumed];
5092
5093 assert!(body.contains("${x/`echo }`/foo)},1"));
5094 assert!(body.ends_with(')'));
5095 }
5096
5097 #[test]
5098 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5099 {
5100 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5101
5102 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5103 let body = &source[..consumed];
5104
5105 assert!(body.contains("${x/<(echo })/foo)},1"));
5106 assert!(body.ends_with(')'));
5107 }
5108
5109 #[test]
5110 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5111 let source = "printf %s 1,2; echo case in)";
5112
5113 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5114 let body = &source[..consumed];
5115
5116 assert_eq!(body, source);
5117 }
5118
5119 #[test]
5120 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5121 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5122
5123 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5124 let body = &source[..consumed];
5125
5126 assert_eq!(body, source);
5127 }
5128
5129 #[test]
5130 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5131 let source = "printf %s `echo foo)`; printf %s ok)";
5132
5133 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5134 let body = &source[..consumed];
5135
5136 assert_eq!(body, source);
5137 }
5138
5139 #[test]
5140 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5141 let source = "echo \"$line\" | cut -d' ' -f2-)";
5142
5143 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5144 let body = &source[..consumed];
5145
5146 assert_eq!(body, source);
5147 }
5148
5149 #[test]
5150 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5151 let source = "echo \"${@}\" | tr -d '[:space:]')";
5152
5153 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5154 let body = &source[..consumed];
5155
5156 assert_eq!(body, source);
5157 }
5158
5159 #[test]
5160 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5161 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5162
5163 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5164 let body = &source[..consumed];
5165
5166 assert_eq!(body, source);
5167 }
5168
5169 #[test]
5170 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5171 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5172
5173 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5174 let body = &source[..consumed];
5175
5176 assert_eq!(body, source);
5177 }
5178
5179 #[test]
5180 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5181 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5182 let mut lexer = Lexer::new(source);
5183
5184 let first = lexer.next_lexed_token().expect("expected first token");
5185 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5186 assert_eq!(first.word_string().as_deref(), Some("echo"));
5187
5188 let second = lexer.next_lexed_token().expect("expected second token");
5189 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5190 assert_eq!(
5191 second.word_string().as_deref(),
5192 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5193 );
5194 }
5195
5196 #[test]
5197 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5198 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5199 let start = source.find("$(").expect("expected command substitution") + 2;
5200 let consumed =
5201 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5202 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5203 }
5204
5205 #[test]
5206 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5207 let source = "echo $(echo $(basename $filename .fuzz))";
5208 let start = source.find("$(").expect("expected command substitution") + 2;
5209 let consumed =
5210 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5211 assert_eq!(
5212 &source[start..start + consumed],
5213 "echo $(basename $filename .fuzz))"
5214 );
5215 }
5216
5217 #[test]
5218 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5219 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5220 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5221 assert_eq!(consumed, source.len());
5222 }
5223
5224 #[test]
5225 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5226 let source = "'foo'bar";
5227 let mut lexer = Lexer::new(source);
5228
5229 let token = lexer.next_lexed_token().unwrap();
5230 assert_eq!(token.kind, TokenKind::LiteralWord);
5231
5232 let word = token.word().unwrap();
5233 let segments: Vec<_> = word
5234 .segments()
5235 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5236 .collect();
5237
5238 assert_eq!(
5239 segments,
5240 vec![
5241 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5242 (LexedWordSegmentKind::Plain, "bar".to_string()),
5243 ]
5244 );
5245 assert_eq!(word.joined_text(), "foobar");
5246 assert_eq!(
5247 word.segments()
5248 .nth(1)
5249 .and_then(LexedWordSegment::span)
5250 .unwrap()
5251 .slice(source),
5252 "bar"
5253 );
5254 }
5255
5256 #[test]
5257 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5258 let source = "$(printf hi)";
5259 let mut lexer = Lexer::new(source);
5260
5261 let token = lexer.next_lexed_token().unwrap();
5262 assert_eq!(token.kind, TokenKind::Word);
5263
5264 let word = token.word().unwrap();
5265 let segment = word.single_segment().unwrap();
5266 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5267 assert_eq!(segment.as_str(), source);
5268 assert_eq!(segment.span().unwrap().slice(source), source);
5269 }
5270
5271 #[test]
5272 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5273 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5274 let mut lexer = Lexer::new(source);
5275
5276 let token = lexer.next_lexed_token().unwrap();
5277 assert_eq!(token.kind, TokenKind::Word);
5278
5279 let word = token.word().unwrap();
5280 let segment = word.single_segment().unwrap();
5281 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5282 assert_eq!(segment.as_str(), source);
5283 assert_eq!(segment.span().unwrap().slice(source), source);
5284 }
5285
5286 #[test]
5287 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5288 let source = "\"foo\"$(printf hi)";
5289 let mut lexer = Lexer::new(source);
5290
5291 let token = lexer.next_lexed_token().unwrap();
5292 assert_eq!(token.kind, TokenKind::Word);
5293
5294 let word = token.word().unwrap();
5295 let continuation = word.segments().nth(1).unwrap();
5296 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5297 assert_eq!(continuation.as_str(), "$(printf hi)");
5298 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5299 }
5300
5301 #[test]
5302 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5303 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5304 let mut lexer = Lexer::new(source);
5305
5306 let token = lexer.next_lexed_token().unwrap();
5307 assert_eq!(token.kind, TokenKind::QuotedWord);
5308
5309 let word = token.word().unwrap();
5310 let segment = word.single_segment().unwrap();
5311 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5312 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5313 assert_eq!(
5314 segment.span().unwrap().slice(source),
5315 "${arr[$RANDOM % ${#arr[@]}]}"
5316 );
5317 }
5318
5319 #[test]
5320 fn test_ansi_c_control_escape_can_consume_quote() {
5321 let mut lexer = Lexer::new("echo $'\\c''");
5322
5323 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5324 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5325 assert!(lexer.next_lexed_token().is_none());
5326 }
5327
5328 #[test]
5329 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5330 let source = r#"out_line="${out_line//'"'/'\"'}"
5331"#;
5332 let mut lexer = Lexer::new(source);
5333
5334 assert_next_token(
5335 &mut lexer,
5336 TokenKind::Word,
5337 Some(r#"out_line=${out_line//'"'/'"'}"#),
5338 );
5339 assert_next_token(&mut lexer, TokenKind::Newline, None);
5340 assert!(lexer.next_lexed_token().is_none());
5341 }
5342
5343 #[test]
5344 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5345 let source = r#"out_line="${out_line//'"'/'\"'}"
5346echo "Error: Missing python3!"
5347cat << 'EOF' > "${pywrapper}"
5348import os
5349EOF
5350"#;
5351 let mut lexer = Lexer::new(source);
5352
5353 assert_next_token(
5354 &mut lexer,
5355 TokenKind::Word,
5356 Some(r#"out_line=${out_line//'"'/'"'}"#),
5357 );
5358 assert_next_token(&mut lexer, TokenKind::Newline, None);
5359 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5360 assert_next_token(
5361 &mut lexer,
5362 TokenKind::QuotedWord,
5363 Some("Error: Missing python3!"),
5364 );
5365 assert_next_token(&mut lexer, TokenKind::Newline, None);
5366 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5367 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5368 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5369 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5370 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5371 }
5372
5373 #[test]
5374 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5375 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5376 let mut lexer = Lexer::new(source);
5377
5378 let token = lexer.next_lexed_token().unwrap();
5379 assert_eq!(token.kind, TokenKind::Word);
5380 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5381 assert!(token.source_slice(source).is_none());
5382 assert_eq!(
5383 token.word_string().as_deref(),
5384 Some("crypt=${crypt//\\/\\\\}")
5385 );
5386 assert_next_token(&mut lexer, TokenKind::Newline, None);
5387 assert!(lexer.next_lexed_token().is_none());
5388 }
5389
5390 #[test]
5391 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5392 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5393 let mut lexer = Lexer::new(source);
5394
5395 assert_next_token(
5396 &mut lexer,
5397 TokenKind::Word,
5398 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5399 );
5400 assert_next_token(&mut lexer, TokenKind::Newline, None);
5401 assert_next_token(&mut lexer, TokenKind::Newline, None);
5402 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5403 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5404 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5405 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5406 assert_next_token(&mut lexer, TokenKind::Newline, None);
5407 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5408 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5409 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5410 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5411 assert_next_token(&mut lexer, TokenKind::Newline, None);
5412 assert_next_token(
5413 &mut lexer,
5414 TokenKind::Word,
5415 Some(
5416 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5417 ),
5418 );
5419 assert_next_token(&mut lexer, TokenKind::Newline, None);
5420 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5421 assert_next_token(&mut lexer, TokenKind::Newline, None);
5422 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5423 assert_next_token(&mut lexer, TokenKind::Newline, None);
5424 assert!(lexer.next_lexed_token().is_none());
5425 }
5426
5427 #[test]
5428 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5429 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5430 let mut lexer = Lexer::new(source);
5431
5432 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5433 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5434 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5435 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5436 assert_next_token(&mut lexer, TokenKind::And, None);
5437 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5438 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5439 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5440 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5441 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5442 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5443 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5444 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5445 assert_next_token(&mut lexer, TokenKind::Newline, None);
5446 assert!(lexer.next_lexed_token().is_none());
5447 }
5448
5449 #[test]
5450 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5451 let source = "echo -{$(echo a),b}-\n";
5452 let mut lexer = Lexer::new(source);
5453
5454 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5455 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5456 assert_next_token(&mut lexer, TokenKind::Newline, None);
5457 assert!(lexer.next_lexed_token().is_none());
5458 }
5459
5460 #[test]
5461 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5462 let source = "echo -{$((1 + 2)),b}-\n";
5463 let mut lexer = Lexer::new(source);
5464
5465 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5466 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5467 assert_next_token(&mut lexer, TokenKind::Newline, None);
5468 assert!(lexer.next_lexed_token().is_none());
5469 }
5470
5471 #[test]
5472 fn test_operators() {
5473 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5474
5475 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5476 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5477 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5478 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5479 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5480 assert_next_token(&mut lexer, TokenKind::And, None);
5481 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5482 assert_next_token(&mut lexer, TokenKind::Or, None);
5483 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5484 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5485 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5486 assert_next_token(&mut lexer, TokenKind::Background, None);
5487 assert!(lexer.next_lexed_token().is_none());
5488 }
5489
5490 #[test]
5491 fn test_double_left_bracket_requires_separator() {
5492 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5493
5494 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5495 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5496 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5497 assert_next_token(&mut lexer, TokenKind::Newline, None);
5498 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5499 assert_next_token(&mut lexer, TokenKind::Newline, None);
5500 assert!(lexer.next_lexed_token().is_none());
5501 }
5502
5503 #[test]
5504 fn test_redirects() {
5505 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5506
5507 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5508 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5509 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5510 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5511 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5512 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5513 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5514 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5515 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5516 let token = lexer.next_lexed_token().unwrap();
5517 assert_eq!(token.kind, TokenKind::Clobber);
5518 assert_eq!(token.fd_value(), Some(2));
5519 assert_eq!(token_text(&token, lexer.input), None);
5520 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5521 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5522 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5523 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5524 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5525 assert_next_token(&mut lexer, TokenKind::HereString, None);
5526 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5527 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5528 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5529 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5530 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5531 }
5532
5533 #[test]
5534 fn test_comment() {
5535 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5536
5537 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5538 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5539 assert_next_token(&mut lexer, TokenKind::Newline, None);
5540 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5541 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5542 }
5543
5544 #[test]
5545 fn test_comment_token_with_span() {
5546 let mut lexer = Lexer::new("# lead\necho hi # tail");
5547
5548 let comment = lexer.next_lexed_token_with_comments().unwrap();
5549 assert_eq!(comment.kind, TokenKind::Comment);
5550 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5551 assert_eq!(comment.span.start.line, 1);
5552 assert_eq!(comment.span.start.column, 1);
5553 assert_eq!(comment.span.end.line, 1);
5554 assert_eq!(comment.span.end.column, 7);
5555
5556 assert_next_token(&mut lexer, TokenKind::Newline, None);
5557 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5558 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5559
5560 let inline = lexer.next_lexed_token_with_comments().unwrap();
5561 assert_eq!(inline.kind, TokenKind::Comment);
5562 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5563 assert_eq!(inline.span.start.line, 2);
5564 assert_eq!(inline.span.start.column, 9);
5565 }
5566
5567 #[test]
5568 fn test_comment_token_preserves_hash_boundaries() {
5569 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5570
5571 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5572 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5573 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5574 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5575 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5576 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5577 assert!(lexer.next_lexed_token_with_comments().is_none());
5578 }
5579
5580 #[test]
5581 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5582 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5583
5584 let mut saw_comment = false;
5585 while let Some(token) = lexer.next_lexed_token_with_comments() {
5586 if token.kind == TokenKind::Comment {
5587 saw_comment = true;
5588 break;
5589 }
5590 }
5591
5592 assert!(
5593 !saw_comment,
5594 "zsh inline glob controls inside [[ ]] should not lex as comments"
5595 );
5596 }
5597
5598 #[test]
5599 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5600 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5601
5602 let mut saw_comment = false;
5603 while let Some(token) = lexer.next_lexed_token_with_comments() {
5604 if token.kind == TokenKind::Comment {
5605 saw_comment = true;
5606 break;
5607 }
5608 }
5609
5610 assert!(
5611 !saw_comment,
5612 "zsh arithmetic char literals inside (( )) should not lex as comments"
5613 );
5614 }
5615
5616 #[test]
5617 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5618 let mut lexer = Lexer::new(
5619 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5620 );
5621
5622 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5623 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5624 assert_next_token(
5625 &mut lexer,
5626 TokenKind::LiteralWord,
5627 Some("\\e]133;C;cmdline_url=%s\\a"),
5628 );
5629 assert_next_token(
5630 &mut lexer,
5631 TokenKind::QuotedWord,
5632 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5633 );
5634 assert_next_token(&mut lexer, TokenKind::Newline, None);
5635 }
5636
5637 #[test]
5638 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5639 let mut lexer = Lexer::new(
5640 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5641 );
5642
5643 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5644 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5645 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5646 assert_next_token(&mut lexer, TokenKind::Newline, None);
5647 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5648 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5649 assert_next_token(
5650 &mut lexer,
5651 TokenKind::LiteralWord,
5652 Some("\\e]133;C;cmdline_url=%s\\a"),
5653 );
5654 assert_next_token(
5655 &mut lexer,
5656 TokenKind::QuotedWord,
5657 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5658 );
5659 assert_next_token(&mut lexer, TokenKind::Newline, None);
5660 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5661 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5662 assert_next_token(&mut lexer, TokenKind::Newline, None);
5663 }
5664
5665 #[test]
5666 fn test_variable_words() {
5667 let mut lexer = Lexer::new("echo $HOME $USER");
5668
5669 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5670 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5671 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5672 assert!(lexer.next_lexed_token().is_none());
5673 }
5674
5675 #[test]
5676 fn test_pipeline_tokens() {
5677 let mut lexer = Lexer::new("echo hello | cat");
5678
5679 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5680 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5681 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5682 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5683 assert!(lexer.next_lexed_token().is_none());
5684 }
5685
5686 #[test]
5687 fn test_read_heredoc() {
5688 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5690 let content = lexer.read_heredoc("EOF", false);
5691 assert_eq!(content.content, "hello\nworld\n");
5692 }
5693
5694 #[test]
5695 fn test_read_heredoc_single_line() {
5696 let mut lexer = Lexer::new("\ntest\nEOF");
5697 let content = lexer.read_heredoc("EOF", false);
5698 assert_eq!(content.content, "test\n");
5699 }
5700
5701 #[test]
5702 fn test_read_heredoc_full_scenario() {
5703 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5705
5706 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5708 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5709 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5710
5711 let content = lexer.read_heredoc("EOF", false);
5713 assert_eq!(content.content, "hello\nworld\n");
5714 }
5715
5716 #[test]
5717 fn test_read_heredoc_with_redirect() {
5718 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5720 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5721 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5722 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5723 let content = lexer.read_heredoc("EOF", false);
5724 assert_eq!(content.content, "hello\n");
5725 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5727 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5728 }
5729
5730 #[test]
5731 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5732 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
5733 let mut lexer = Lexer::new(source);
5734
5735 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5736 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5737 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5738
5739 let heredoc = lexer.read_heredoc("EOF", false);
5740 assert_eq!(heredoc.content, "hello\n");
5741
5742 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5743 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
5744 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5745 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5746 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
5747 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5748 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
5749 }
5750
5751 #[test]
5752 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
5753 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
5754 let mut lexer = Lexer::new(source);
5755
5756 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5757 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5758 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5759
5760 let heredoc = lexer.read_heredoc("EOF", false);
5761 assert_eq!(heredoc.content, "1\n2\n3\n");
5762 }
5763
5764 #[test]
5765 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
5766 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
5767 let mut lexer = Lexer::new(source);
5768
5769 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5770 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5771 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5772
5773 let heredoc = lexer.read_heredoc("EOF", false);
5774 assert_eq!(heredoc.content, "body\n");
5775 }
5776
5777 #[test]
5778 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
5779 let source = "cat <<EOF # note \\\nbody\nEOF\n";
5780 let mut lexer = Lexer::new(source);
5781
5782 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5783 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5784 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5785
5786 let heredoc = lexer.read_heredoc("EOF", false);
5787 assert_eq!(heredoc.content, "body\n");
5788 }
5789
5790 #[test]
5791 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
5792 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
5793 let mut lexer = Lexer::new(source);
5794
5795 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5796 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5797 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5798 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5799
5800 let heredoc = lexer.read_heredoc("EOF", false);
5801 assert_eq!(heredoc.content, "body\n");
5802
5803 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5804 }
5805
5806 #[test]
5807 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
5808 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
5809 let mut lexer = Lexer::new(source);
5810
5811 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5812 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5813 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5814
5815 let heredoc = lexer.read_heredoc("EOF", false);
5816 assert_eq!(heredoc.content, "1\n");
5817
5818 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5819 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
5820 }
5821
5822 #[test]
5823 fn test_read_heredoc_with_redirect_preserves_following_spans() {
5824 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
5825 let mut lexer = Lexer::new(source);
5826
5827 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5828 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5829 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5830
5831 let heredoc = lexer.read_heredoc("EOF", false);
5832 assert_eq!(heredoc.content, "hello\n");
5833
5834 let redirect = lexer.next_lexed_token_with_comments().unwrap();
5835 assert_eq!(redirect.kind, TokenKind::RedirectOut);
5836 assert_eq!(redirect.span.slice(source), ">");
5837
5838 let target = lexer.next_lexed_token_with_comments().unwrap();
5839 assert_eq!(target.kind, TokenKind::Word);
5840 assert_eq!(
5841 token_text(&target, lexer.input).as_deref(),
5842 Some("file.txt")
5843 );
5844 assert_eq!(target.span.slice(source), "file.txt");
5845
5846 let newline = lexer.next_lexed_token_with_comments().unwrap();
5847 assert_eq!(newline.kind, TokenKind::Newline);
5848 assert_eq!(newline.span.slice(source), "\n");
5849
5850 let comment = lexer.next_lexed_token_with_comments().unwrap();
5851 assert_eq!(comment.kind, TokenKind::Comment);
5852 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
5853 assert_eq!(comment.span.slice(source), "# done");
5854 }
5855
5856 #[test]
5857 fn test_comment_with_unicode() {
5858 let source = "# café résumé\necho ok";
5860 let mut lexer = Lexer::new(source);
5861
5862 let comment = lexer.next_lexed_token_with_comments().unwrap();
5863 assert_eq!(comment.kind, TokenKind::Comment);
5864 assert_eq!(
5865 token_text(&comment, lexer.input).as_deref(),
5866 Some(" café résumé")
5867 );
5868 let start = comment.span.start.offset;
5870 let end = comment.span.end.offset;
5871 assert_eq!(start, 0);
5872 assert_eq!(&source[start..end], "# café résumé");
5873 assert!(source.is_char_boundary(start));
5874 assert!(source.is_char_boundary(end));
5875
5876 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5877 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5878 }
5879
5880 #[test]
5881 fn test_comment_with_cjk_characters() {
5882 let source = "# 你好世界\necho ok";
5884 let mut lexer = Lexer::new(source);
5885
5886 let comment = lexer.next_lexed_token_with_comments().unwrap();
5887 assert_eq!(comment.kind, TokenKind::Comment);
5888 assert_eq!(
5889 token_text(&comment, lexer.input).as_deref(),
5890 Some(" 你好世界")
5891 );
5892 let start = comment.span.start.offset;
5893 let end = comment.span.end.offset;
5894 assert_eq!(&source[start..end], "# 你好世界");
5895 assert!(source.is_char_boundary(start));
5896 assert!(source.is_char_boundary(end));
5897 }
5898
5899 #[test]
5900 fn test_heredoc_with_comments_inside() {
5901 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
5903 let mut lexer = Lexer::new(source);
5904
5905 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5906 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5907 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
5908
5909 let heredoc = lexer.read_heredoc("EOF", false);
5910 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
5911
5912 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5915 let comment = lexer.next_lexed_token_with_comments().unwrap();
5916 assert_eq!(comment.kind, TokenKind::Comment);
5917 assert_eq!(
5918 token_text(&comment, lexer.input).as_deref(),
5919 Some(" real comment")
5920 );
5921 }
5922
5923 #[test]
5924 fn test_heredoc_with_hash_in_variable() {
5925 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
5927 let mut lexer = Lexer::new(source);
5928
5929 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5930 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5931 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
5932
5933 let heredoc = lexer.read_heredoc("EOF", false);
5934 assert_eq!(heredoc.content, "val=${x#prefix}\n");
5935 }
5936
5937 #[test]
5938 fn test_heredoc_span_does_not_leak() {
5939 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
5942 let mut lexer = Lexer::new(source);
5943
5944 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5945 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5946 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5947
5948 let heredoc = lexer.read_heredoc("EOF", false);
5949 let start = heredoc.content_span.start.offset;
5950 let end = heredoc.content_span.end.offset;
5951 assert!(
5952 end <= source.len(),
5953 "heredoc span end ({end}) exceeds source length ({})",
5954 source.len()
5955 );
5956 assert_eq!(&source[start..end], "hello\nworld\n");
5957
5958 assert_next_token(&mut lexer, TokenKind::Newline, None);
5960 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5961 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
5962 }
5963
5964 #[test]
5965 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
5966 let source = "\
5967cat <<\\_ACEOF
5968Use these variables to override the choices made by `configure' or to help
5969it to find libraries and programs with nonstandard names/locations.
5970_ACEOF
5971ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
5972ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
5973";
5974 let mut lexer = Lexer::new(source);
5975
5976 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
5977 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
5978 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
5979 assert_eq!(delimiter.kind, TokenKind::Word);
5980 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
5981
5982 let heredoc = lexer.read_heredoc("_ACEOF", false);
5983 assert_eq!(
5984 heredoc.content,
5985 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
5986 );
5987
5988 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
5989
5990 let first = lexer.next_lexed_token_with_comments().unwrap();
5991 assert_eq!(first.kind, TokenKind::Word);
5992 assert_eq!(
5993 first.span.slice(source),
5994 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
5995 );
5996 let first_segments = first
5997 .word()
5998 .unwrap()
5999 .segments()
6000 .map(|segment| {
6001 (
6002 segment.kind(),
6003 segment.as_str().to_string(),
6004 segment.span().map(|span| span.slice(source).to_string()),
6005 )
6006 })
6007 .collect::<Vec<_>>();
6008 assert_eq!(
6009 first_segments,
6010 vec![
6011 (
6012 LexedWordSegmentKind::Plain,
6013 "ac_dir_suffix=/".to_string(),
6014 Some("ac_dir_suffix=/".to_string()),
6015 ),
6016 (
6017 LexedWordSegmentKind::Plain,
6018 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6019 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6020 ),
6021 ]
6022 );
6023
6024 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6025
6026 let second = lexer.next_lexed_token_with_comments().unwrap();
6027 assert_eq!(second.kind, TokenKind::Word);
6028 assert_eq!(
6029 second.span.slice(source),
6030 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6031 );
6032 let second_segments = second
6033 .word()
6034 .unwrap()
6035 .segments()
6036 .map(|segment| {
6037 (
6038 segment.kind(),
6039 segment.as_str().to_string(),
6040 segment.span().map(|span| span.slice(source).to_string()),
6041 )
6042 })
6043 .collect::<Vec<_>>();
6044 assert_eq!(
6045 second_segments,
6046 vec![
6047 (
6048 LexedWordSegmentKind::Plain,
6049 "ac_top_builddir_sub=".to_string(),
6050 Some("ac_top_builddir_sub=".to_string()),
6051 ),
6052 (
6053 LexedWordSegmentKind::Plain,
6054 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6055 Some(
6056 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6057 .to_string(),
6058 ),
6059 ),
6060 ]
6061 );
6062 }
6063
6064 #[test]
6065 fn test_heredoc_with_unicode_content() {
6066 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6068 let mut lexer = Lexer::new(source);
6069
6070 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6071 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6072 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6073
6074 let heredoc = lexer.read_heredoc("EOF", false);
6075 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6076 let start = heredoc.content_span.start.offset;
6077 let end = heredoc.content_span.end.offset;
6078 assert!(
6079 source.is_char_boundary(start),
6080 "heredoc span start ({start}) not on char boundary"
6081 );
6082 assert!(
6083 source.is_char_boundary(end),
6084 "heredoc span end ({end}) not on char boundary"
6085 );
6086 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6087 }
6088
6089 #[test]
6090 fn test_assoc_compound_assignment() {
6091 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6094 assert_next_token(
6095 &mut lexer,
6096 TokenKind::Word,
6097 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6098 );
6099 assert!(lexer.next_lexed_token().is_none());
6100 }
6101
6102 #[test]
6103 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6104 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6105 let mut lexer = Lexer::new(source);
6106
6107 let token = lexer.next_lexed_token().unwrap();
6108 assert_eq!(token.kind, TokenKind::Word);
6109 assert_eq!(token.span.slice(source), source);
6110 assert!(lexer.next_lexed_token().is_none());
6111 }
6112
6113 #[test]
6114 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6115 let source = r#"foo\_bar@(baz|qux)"#;
6116 let mut lexer = Lexer::new(source);
6117
6118 let token = lexer.next_lexed_token().unwrap();
6119 assert_eq!(token.kind, TokenKind::Word);
6120 assert_eq!(token.span.slice(source), source);
6121 assert!(lexer.next_lexed_token().is_none());
6122 }
6123
6124 #[test]
6125 fn test_indexed_array_not_collapsed() {
6126 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6129 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6130 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6131 }
6132
6133 #[test]
6134 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6135 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6136 let mut lexer = Lexer::new(source);
6137
6138 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6139 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6140
6141 let token = lexer.next_lexed_token().unwrap();
6142 assert_eq!(token.kind, TokenKind::Word);
6143 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6144
6145 let word = token.word().unwrap();
6146 let segments: Vec<_> = word
6147 .segments()
6148 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6149 .collect();
6150 assert_eq!(
6151 segments,
6152 vec![
6153 (
6154 LexedWordSegmentKind::DoubleQuoted,
6155 "$plugin_dir".to_string()
6156 ),
6157 (LexedWordSegmentKind::Plain, "/*".to_string()),
6158 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6159 ]
6160 );
6161
6162 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6163 assert!(lexer.next_lexed_token().is_none());
6164 }
6165
6166 #[test]
6167 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6168 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6169 let mut lexer = Lexer::new(source);
6170
6171 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6172 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6173
6174 let token = lexer.next_lexed_token().unwrap();
6175 assert_eq!(token.kind, TokenKind::Word);
6176 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6177
6178 let word = token.word().unwrap();
6179 let segments: Vec<_> = word
6180 .segments()
6181 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6182 .collect();
6183 assert_eq!(
6184 segments,
6185 vec![
6186 (
6187 LexedWordSegmentKind::DoubleQuoted,
6188 "$__GREP_CACHE_FILE".to_string()
6189 ),
6190 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6191 ]
6192 );
6193
6194 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6195 assert!(lexer.next_lexed_token().is_none());
6196 }
6197
6198 #[test]
6199 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6200 let source = r#"$dir/${~pats}(N)"#;
6201 let mut lexer = Lexer::new(source);
6202
6203 let token = lexer.next_lexed_token().unwrap();
6204 assert_eq!(token.kind, TokenKind::Word);
6205 assert_eq!(token.span.slice(source), source);
6206 assert!(lexer.next_lexed_token().is_none());
6207 }
6208
6209 #[test]
6210 fn test_dollar_word_does_not_absorb_function_parens() {
6211 let mut lexer = Lexer::new(r#"foo$x()"#);
6212
6213 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6214 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6215 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6216 assert!(lexer.next_lexed_token().is_none());
6217 }
6218
6219 #[test]
6220 fn test_command_substitution_word_does_not_absorb_function_parens() {
6221 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6222
6223 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6224 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6225 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6226 assert!(lexer.next_lexed_token().is_none());
6227 }
6228
6229 #[test]
6232 fn test_digit_at_eof_no_panic() {
6233 let mut lexer = Lexer::new("2");
6235 let token = lexer.next_lexed_token();
6236 assert!(token.is_some());
6237 }
6238
6239 #[test]
6241 fn test_nested_brace_expansion_single_token() {
6242 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6244 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6245 assert!(lexer.next_lexed_token().is_none());
6247 }
6248
6249 #[test]
6251 fn test_simple_brace_expansion_unchanged() {
6252 let mut lexer = Lexer::new("${foo}");
6253 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6254 assert!(lexer.next_lexed_token().is_none());
6255 }
6256
6257 #[test]
6258 fn test_nvm_fixture_lexes_without_stalling() {
6259 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6260 let mut lexer = Lexer::new(input);
6261 let mut tokens = 0usize;
6262
6263 while lexer.next_lexed_token().is_some() {
6264 tokens += 1;
6265 assert!(
6266 tokens < 100_000,
6267 "lexer should continue making progress on the nvm fixture"
6268 );
6269 }
6270
6271 assert!(tokens > 0, "nvm fixture should produce at least one token");
6272 }
6273
6274 #[test]
6275 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6276 let input = concat!(
6277 "case \"${_input_type:-}\" in\n",
6278 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6279 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6280 "esac\n",
6281 );
6282
6283 assert_non_newline_tokens_stay_on_one_line(input);
6284
6285 let mut lexer = Lexer::new(input);
6286 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6287 .map(|token| (token.kind, token_text(&token, input)))
6288 .collect::<Vec<_>>();
6289 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6290 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6291 }
6292
6293 #[test]
6294 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6295 let input = concat!(
6296 "case $2 in\n",
6297 " cygwin*) bin='cygwin32/bin' ;|\n",
6298 "esac\n",
6299 );
6300
6301 let mut lexer = Lexer::new(input);
6302 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6303 .map(|token| (token.kind, token_text(&token, input)))
6304 .collect::<Vec<_>>();
6305
6306 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6307 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6308 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6309 }
6310
6311 #[test]
6312 fn test_inline_if_with_array_append_stays_line_local() {
6313 let input = concat!(
6314 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6315 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6316 );
6317
6318 assert_non_newline_tokens_stay_on_one_line(input);
6319 }
6320
6321 #[test]
6322 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6323 let source = "unsetopt interactive_comments\n#literal\n";
6324 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6325 let mut lexer = Lexer::with_profile(source, &profile);
6326
6327 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6328 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6329 assert_next_token(&mut lexer, TokenKind::Newline, None);
6330 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6331 }
6332
6333 #[test]
6334 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6335 let source = "setopt rc_quotes\nprint 'a''b'\n";
6336 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6337 let mut lexer = Lexer::with_profile(source, &profile);
6338
6339 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6340 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6341 assert_next_token(&mut lexer, TokenKind::Newline, None);
6342 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6343 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6344 }
6345
6346 #[test]
6347 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6348 let source = "setopt ignore_braces\n{ echo }\n";
6349 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6350 let mut lexer = Lexer::with_profile(source, &profile);
6351
6352 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6353 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6354 assert_next_token(&mut lexer, TokenKind::Newline, None);
6355 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6356 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6357 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6358 }
6359
6360 #[test]
6361 fn test_heredoc_in_arithmetic_fuzz_crash() {
6362 let data: &[u8] = &[
6366 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6367 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6368 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6369 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6370 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6371 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6372 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6373 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6374 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6375 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6376 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6377 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6378 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6379 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6380 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6381 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6382 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6383 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6384 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6385 ];
6386 let input = std::str::from_utf8(data).unwrap();
6387 let script = format!("echo $(({input}))\n");
6388 let _ = crate::parser::Parser::new(&script).parse();
6390 }
6391}