1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub(crate) enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub(crate) struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub(crate) fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub(crate) const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub(crate) const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub(crate) fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub(crate) struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub(crate) fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub(crate) fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub(crate) fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub(crate) fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub(crate) const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
338pub struct LexedToken<'a> {
339 pub kind: TokenKind,
341 pub span: Span,
343 pub(crate) flags: TokenFlags,
344 payload: TokenPayload<'a>,
345}
346
347impl<'a> LexedToken<'a> {
348 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
349 match kind {
350 TokenKind::Word => LexedWordSegmentKind::Plain,
351 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
352 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
353 _ => LexedWordSegmentKind::Composite,
354 }
355 }
356
357 pub(crate) fn punctuation(kind: TokenKind) -> Self {
358 Self {
359 kind,
360 span: Span::new(),
361 flags: TokenFlags::empty(),
362 payload: TokenPayload::None,
363 }
364 }
365
366 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
367 let flags = if word.has_cooked_text() {
368 TokenFlags::cooked_text()
369 } else {
370 TokenFlags::empty()
371 };
372
373 Self {
374 kind,
375 span: Span::new(),
376 flags,
377 payload: TokenPayload::Word(word),
378 }
379 }
380
381 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
382 Self::with_word_payload(
383 kind,
384 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
385 )
386 }
387
388 fn owned_word(kind: TokenKind, text: String) -> Self {
389 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
390 }
391
392 fn comment() -> Self {
393 Self {
394 kind: TokenKind::Comment,
395 span: Span::new(),
396 flags: TokenFlags::empty(),
397 payload: TokenPayload::None,
398 }
399 }
400
401 fn fd(kind: TokenKind, fd: i32) -> Self {
402 Self {
403 kind,
404 span: Span::new(),
405 flags: TokenFlags::empty(),
406 payload: TokenPayload::Fd(fd),
407 }
408 }
409
410 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
411 Self {
412 kind,
413 span: Span::new(),
414 flags: TokenFlags::empty(),
415 payload: TokenPayload::FdPair(src_fd, dst_fd),
416 }
417 }
418
419 fn error(kind: LexerErrorKind) -> Self {
420 Self {
421 kind: TokenKind::Error,
422 span: Span::new(),
423 flags: TokenFlags::empty(),
424 payload: TokenPayload::Error(kind),
425 }
426 }
427
428 pub(crate) fn with_span(mut self, span: Span) -> Self {
429 self.span = span;
430 self
431 }
432
433 pub(crate) fn rebased(mut self, base: Position) -> Self {
434 self.span = self.span.rebased(base);
435 self.payload = match self.payload {
436 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
437 payload => payload,
438 };
439 self
440 }
441
442 pub(crate) fn with_synthetic_flag(mut self) -> Self {
443 self.flags = self.flags.with_synthetic();
444 self
445 }
446
447 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
448 let payload = match self.payload {
449 TokenPayload::None => TokenPayload::None,
450 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
451 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
452 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
453 TokenPayload::Error(kind) => TokenPayload::Error(kind),
454 };
455
456 LexedToken {
457 kind: self.kind,
458 span: self.span,
459 flags: self.flags,
460 payload,
461 }
462 }
463
464 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
465 let payload = match self.payload {
466 TokenPayload::None => TokenPayload::None,
467 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
468 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
469 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
470 TokenPayload::Error(kind) => TokenPayload::Error(kind),
471 };
472
473 LexedToken {
474 kind: self.kind,
475 span: self.span,
476 flags: self.flags,
477 payload,
478 }
479 }
480
481 pub(crate) fn word_text(&self) -> Option<&str> {
483 self.kind
484 .is_word_like()
485 .then_some(())
486 .and_then(|_| match &self.payload {
487 TokenPayload::Word(word) => word.text(),
488 _ => None,
489 })
490 }
491
492 pub(crate) fn word_string(&self) -> Option<String> {
494 self.kind
495 .is_word_like()
496 .then_some(())
497 .and_then(|_| match &self.payload {
498 TokenPayload::Word(word) => Some(word.joined_text()),
499 _ => None,
500 })
501 }
502
503 pub(crate) fn word(&self) -> Option<&LexedWord<'a>> {
505 match &self.payload {
506 TokenPayload::Word(word) => Some(word),
507 _ => None,
508 }
509 }
510
511 pub(crate) fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
513 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
514 return None;
515 }
516
517 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
518 .then(|| &source[self.span.start.offset..self.span.end.offset])
519 }
520
521 pub(crate) fn fd_value(&self) -> Option<i32> {
523 match self.payload {
524 TokenPayload::Fd(fd) => Some(fd),
525 _ => None,
526 }
527 }
528
529 pub(crate) fn fd_pair_value(&self) -> Option<(i32, i32)> {
531 match self.payload {
532 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
533 _ => None,
534 }
535 }
536
537 pub(crate) fn error_kind(&self) -> Option<LexerErrorKind> {
539 match self.payload {
540 TokenPayload::Error(kind) => Some(kind),
541 _ => None,
542 }
543 }
544}
545
546#[derive(Debug, Clone, PartialEq)]
548pub(crate) struct HeredocRead {
549 pub content: String,
551 pub content_span: Span,
553}
554
555const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
558const MAX_PARAMETER_EXPANSION_SCAN_DEPTH: usize = 4;
559
560#[derive(Clone, Debug)]
561struct Cursor<'a> {
562 rest: &'a str,
563}
564
565impl<'a> Cursor<'a> {
566 fn new(source: &'a str) -> Self {
567 Self { rest: source }
568 }
569
570 fn first(&self) -> Option<char> {
571 self.rest.chars().next()
572 }
573
574 fn second(&self) -> Option<char> {
575 let mut chars = self.rest.chars();
576 chars.next()?;
577 chars.next()
578 }
579
580 fn third(&self) -> Option<char> {
581 let mut chars = self.rest.chars();
582 chars.next()?;
583 chars.next()?;
584 chars.next()
585 }
586
587 fn bump(&mut self) -> Option<char> {
588 let ch = self.first()?;
589 self.rest = &self.rest[ch.len_utf8()..];
590 Some(ch)
591 }
592
593 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
594 let start = self.rest;
595 let mut end = 0;
596
597 for ch in start.chars() {
598 if !predicate(ch) {
599 break;
600 }
601 end += ch.len_utf8();
602 }
603
604 self.rest = &start[end..];
605 &start[..end]
606 }
607
608 fn rest(&self) -> &'a str {
609 self.rest
610 }
611
612 fn skip_bytes(&mut self, count: usize) {
613 self.rest = &self.rest[count..];
614 }
615
616 fn find_byte(&self, byte: u8) -> Option<usize> {
617 memchr(byte, self.rest.as_bytes())
618 }
619}
620
621#[derive(Clone, Debug)]
622struct PositionMap<'a> {
623 source: &'a str,
624 line_starts: Arc<[usize]>,
625 cached: Position,
626}
627
628#[cfg(feature = "benchmarking")]
629#[derive(Clone, Copy, Debug, Default)]
630pub(crate) struct LexerBenchmarkCounters {
631 pub(crate) current_position_calls: u64,
632}
633
634impl<'a> PositionMap<'a> {
635 fn new(source: &'a str) -> Self {
636 let mut line_starts =
637 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
638 line_starts.push(0);
639 line_starts.extend(
640 source
641 .bytes()
642 .enumerate()
643 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
644 );
645
646 Self {
647 source,
648 line_starts: line_starts.into(),
649 cached: Position::new(),
650 }
651 }
652
653 fn position(&mut self, offset: usize) -> Position {
654 if offset == self.cached.offset {
655 return self.cached;
656 }
657
658 let position = if offset > self.cached.offset && offset <= self.source.len() {
659 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
660 } else {
661 self.position_uncached(offset)
662 };
663 self.cached = position;
664 position
665 }
666
667 fn position_uncached(&self, offset: usize) -> Position {
668 let offset = offset.min(self.source.len());
669 let line_index = self
670 .line_starts
671 .partition_point(|start| *start <= offset)
672 .saturating_sub(1);
673 let line_start = self.line_starts[line_index];
674 let line_text = &self.source[line_start..offset];
675 let column = if line_text.is_ascii() {
676 line_text.len() + 1
677 } else {
678 line_text.chars().count() + 1
679 };
680
681 Position {
682 line: line_index + 1,
683 column,
684 offset,
685 }
686 }
687
688 fn advance_from(mut position: Position, text: &str) -> Position {
689 position.offset += text.len();
690 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
691 if newline_count == 0 {
692 position.column += if text.is_ascii() {
693 text.len()
694 } else {
695 text.chars().count()
696 };
697 return position;
698 }
699
700 position.line += newline_count;
701 let tail_start = memrchr(b'\n', text.as_bytes())
702 .map(|index| index + 1)
703 .unwrap_or_default();
704 let tail = &text[tail_start..];
705 position.column = if tail.is_ascii() {
706 tail.len() + 1
707 } else {
708 tail.chars().count() + 1
709 };
710 position
711 }
712}
713
714#[derive(Clone)]
720pub struct Lexer<'a> {
721 input: &'a str,
722 offset: usize,
724 cursor: Cursor<'a>,
725 position_map: PositionMap<'a>,
726 reinject_buf: VecDeque<char>,
729 reinject_resume_offset: Option<usize>,
731 max_subst_depth: usize,
733 initial_zsh_options: Option<ZshOptionState>,
734 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
735 zsh_timeline_index: usize,
736 #[cfg(feature = "benchmarking")]
737 benchmark_counters: Option<LexerBenchmarkCounters>,
738}
739
740impl<'a> Lexer<'a> {
741 pub fn new(input: &'a str) -> Self {
743 Self::with_max_subst_depth_and_profile(
744 input,
745 DEFAULT_MAX_SUBST_DEPTH,
746 &ShellProfile::native(super::ShellDialect::Bash),
747 None,
748 )
749 }
750
751 pub(super) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
754 Self::with_max_subst_depth_and_profile(
755 input,
756 max_depth,
757 &ShellProfile::native(super::ShellDialect::Bash),
758 None,
759 )
760 }
761
762 #[cfg(test)]
764 fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
765 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
766 .then(|| ZshOptionTimeline::build(input, shell_profile))
767 .flatten()
768 .map(Arc::new);
769 Self::with_max_subst_depth_and_profile(
770 input,
771 DEFAULT_MAX_SUBST_DEPTH,
772 shell_profile,
773 zsh_timeline,
774 )
775 }
776
777 pub(crate) fn with_max_subst_depth_and_profile(
778 input: &'a str,
779 max_depth: usize,
780 shell_profile: &ShellProfile,
781 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
782 ) -> Self {
783 Self {
784 input,
785 offset: 0,
786 cursor: Cursor::new(input),
787 position_map: PositionMap::new(input),
788 reinject_buf: VecDeque::new(),
789 reinject_resume_offset: None,
790 max_subst_depth: max_depth,
791 initial_zsh_options: shell_profile.zsh_options().cloned(),
792 zsh_timeline,
793 zsh_timeline_index: 0,
794 #[cfg(feature = "benchmarking")]
795 benchmark_counters: None,
796 }
797 }
798
799 pub(super) fn position_at_offset(&self, offset: usize) -> Position {
800 self.position_map.position_uncached(offset)
801 }
802
803 fn current_position(&mut self) -> Position {
804 #[cfg(feature = "benchmarking")]
805 self.maybe_record_current_position_call();
806 self.position_map.position(self.offset)
807 }
808
809 #[cfg(feature = "benchmarking")]
810 pub(crate) fn enable_benchmark_counters(&mut self) {
811 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
812 }
813
814 #[cfg(feature = "benchmarking")]
815 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
816 self.benchmark_counters.unwrap_or_default()
817 }
818
819 #[cfg(feature = "benchmarking")]
820 fn maybe_record_current_position_call(&mut self) {
821 if let Some(counters) = &mut self.benchmark_counters {
822 counters.current_position_calls += 1;
823 }
824 }
825
826 fn sync_offset_to_cursor(&mut self) {
827 if self.reinject_buf.is_empty()
828 && let Some(offset) = self.reinject_resume_offset.take()
829 {
830 self.offset = offset;
831 }
832 }
833
834 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
840 self.next_lexed_token().map(|token| token.kind)
841 }
842
843 fn peek_char(&mut self) -> Option<char> {
844 self.sync_offset_to_cursor();
845 if let Some(&ch) = self.reinject_buf.front() {
846 Some(ch)
847 } else {
848 self.cursor.first()
849 }
850 }
851
852 fn advance(&mut self) -> Option<char> {
853 self.sync_offset_to_cursor();
854 let ch = if !self.reinject_buf.is_empty() {
855 self.reinject_buf.pop_front()
856 } else {
857 self.cursor.bump()
858 };
859 if let Some(c) = ch {
860 self.offset += c.len_utf8();
861 }
862 ch
863 }
864
865 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
866 self.reinject_buf
867 .iter()
868 .copied()
869 .chain(self.cursor.rest().chars())
870 }
871
872 fn second_char(&self) -> Option<char> {
873 match self.reinject_buf.len() {
874 0 => self.cursor.second(),
875 1 => self.cursor.first(),
876 _ => self.reinject_buf.get(1).copied(),
877 }
878 }
879
880 fn third_char(&self) -> Option<char> {
881 match self.reinject_buf.len() {
882 0 => self.cursor.third(),
883 1 => self.cursor.second(),
884 2 => self.cursor.first(),
885 _ => self.reinject_buf.get(2).copied(),
886 }
887 }
888
889 fn fourth_char(&self) -> Option<char> {
890 match self.reinject_buf.len() {
891 0 => self.cursor.rest().chars().nth(3),
892 1 => self.cursor.third(),
893 2 => self.cursor.second(),
894 3 => self.cursor.first(),
895 _ => self.reinject_buf.get(3).copied(),
896 }
897 }
898
899 fn consume_source_bytes(&mut self, byte_len: usize) {
900 debug_assert!(self.reinject_buf.is_empty());
901 self.sync_offset_to_cursor();
902 self.offset += byte_len;
903 self.cursor.skip_bytes(byte_len);
904 }
905
906 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
907 debug_assert!(self.reinject_buf.is_empty());
908 self.offset += byte_len;
909 }
910
911 fn consume_ascii_chars(&mut self, count: usize) {
912 if self.reinject_buf.is_empty() {
913 self.consume_source_bytes(count);
914 return;
915 }
916
917 for _ in 0..count {
918 self.advance();
919 }
920 }
921
922 fn source_horizontal_whitespace_len(&self) -> usize {
923 self.cursor
924 .rest()
925 .as_bytes()
926 .iter()
927 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
928 .count()
929 }
930
931 fn source_ascii_plain_word_len(&self) -> usize {
932 self.cursor
933 .rest()
934 .as_bytes()
935 .iter()
936 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
937 .count()
938 }
939
940 fn find_double_quote_special(source: &str) -> Option<usize> {
941 source
942 .as_bytes()
943 .iter()
944 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
945 }
946
947 fn ensure_capture_from_source(
948 &self,
949 capture: &mut Option<String>,
950 start: Position,
951 end: Position,
952 ) {
953 if capture.is_none() {
954 *capture = Some(self.input[start.offset..end.offset].to_string());
955 }
956 }
957
958 fn push_capture_char(capture: &mut Option<String>, ch: char) {
959 if let Some(text) = capture.as_mut() {
960 text.push(ch);
961 }
962 }
963
964 fn push_capture_str(capture: &mut Option<String>, text: &str) {
965 if let Some(current) = capture.as_mut() {
966 current.push_str(text);
967 }
968 }
969
970 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
971 if let Some(timeline) = self.zsh_timeline.as_ref() {
972 while self.zsh_timeline_index < timeline.entries.len()
973 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
974 {
975 self.zsh_timeline_index += 1;
976 }
977 return if self.zsh_timeline_index == 0 {
978 self.initial_zsh_options.as_ref()
979 } else {
980 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
981 };
982 }
983
984 self.initial_zsh_options.as_ref()
985 }
986
987 fn comments_enabled(&mut self) -> bool {
988 !self
989 .current_zsh_options()
990 .is_some_and(|options| options.interactive_comments.is_definitely_off())
991 }
992
993 fn rc_quotes_enabled(&mut self) -> bool {
994 self.current_zsh_options()
995 .is_some_and(|options| options.rc_quotes.is_definitely_on())
996 }
997
998 fn ignore_braces_enabled(&mut self) -> bool {
999 self.current_zsh_options()
1000 .is_some_and(|options| options.ignore_braces.is_definitely_on())
1001 }
1002
1003 fn ignore_close_braces_enabled(&mut self) -> bool {
1004 self.current_zsh_options().is_some_and(|options| {
1005 options.ignore_braces.is_definitely_on()
1006 || options.ignore_close_braces.is_definitely_on()
1007 })
1008 }
1009
1010 fn should_treat_hash_as_word_char(&mut self) -> bool {
1011 if !self.comments_enabled() {
1012 return true;
1013 }
1014 self.reinject_buf.is_empty()
1015 && (self
1016 .input
1017 .get(..self.offset)
1018 .and_then(|prefix| prefix.chars().next_back())
1019 .is_some_and(|prev| {
1020 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1021 })
1022 || self.is_inside_unclosed_double_paren_on_line())
1023 }
1024
1025 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1026 capture
1027 .as_deref()
1028 .unwrap_or(&self.input[start.offset..self.offset])
1029 }
1030
1031 fn current_word_surface_is_single_char(
1032 &self,
1033 start: Position,
1034 capture: &Option<String>,
1035 target: char,
1036 ) -> bool {
1037 let text = self.current_word_text(start, capture);
1038 if !text.contains('\x00') {
1039 let mut encoded = [0; 4];
1040 return text == target.encode_utf8(&mut encoded);
1041 }
1042
1043 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1044 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1045 }
1046
1047 fn current_word_surface_last_char<'b>(
1048 &'b self,
1049 start: Position,
1050 capture: &'b Option<String>,
1051 ) -> Option<char> {
1052 self.current_word_text(start, capture)
1053 .chars()
1054 .rev()
1055 .find(|&ch| ch != '\x00')
1056 }
1057
1058 fn current_word_surface_ends_with_char(
1059 &self,
1060 start: Position,
1061 capture: &Option<String>,
1062 target: char,
1063 ) -> bool {
1064 self.current_word_surface_last_char(start, capture) == Some(target)
1065 }
1066
1067 fn current_word_surface_ends_with_extglob_prefix(
1068 &self,
1069 start: Position,
1070 capture: &Option<String>,
1071 ) -> bool {
1072 self.current_word_surface_last_char(start, capture)
1073 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1074 }
1075
1076 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1082 self.skip_whitespace();
1083 let start = self.current_position();
1084 let token = self.next_lexed_token_inner(false)?;
1085 let end = self.current_position();
1086 Some(token.with_span(Span::from_positions(start, end)))
1087 }
1088
1089 pub(super) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1091 self.skip_whitespace();
1092 let start = self.current_position();
1093 let token = self.next_lexed_token_inner(true)?;
1094 let end = self.current_position();
1095 Some(token.with_span(Span::from_positions(start, end)))
1096 }
1097
1098 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1100 let ch = self.peek_char()?;
1101
1102 match ch {
1103 '\n' => {
1104 self.consume_ascii_chars(1);
1105 Some(LexedToken::punctuation(TokenKind::Newline))
1106 }
1107 ';' => {
1108 if self.second_char() == Some(';') {
1109 if self.third_char() == Some('&') {
1110 self.consume_ascii_chars(3);
1111 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1113 self.consume_ascii_chars(2);
1114 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1116 } else if self.second_char() == Some('|') {
1117 self.consume_ascii_chars(2);
1118 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1120 self.consume_ascii_chars(2);
1121 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1123 self.consume_ascii_chars(1);
1124 Some(LexedToken::punctuation(TokenKind::Semicolon))
1125 }
1126 }
1127 '|' => {
1128 if self.second_char() == Some('|') {
1129 self.consume_ascii_chars(2);
1130 Some(LexedToken::punctuation(TokenKind::Or))
1131 } else if self.second_char() == Some('&') {
1132 self.consume_ascii_chars(2);
1133 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1134 } else {
1135 self.consume_ascii_chars(1);
1136 Some(LexedToken::punctuation(TokenKind::Pipe))
1137 }
1138 }
1139 '&' => {
1140 if self.second_char() == Some('&') {
1141 self.consume_ascii_chars(2);
1142 Some(LexedToken::punctuation(TokenKind::And))
1143 } else if self.second_char() == Some('>') {
1144 if self.third_char() == Some('>') {
1145 self.consume_ascii_chars(3);
1146 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1147 } else {
1148 self.consume_ascii_chars(2);
1149 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1150 }
1151 } else if self.second_char() == Some('|') {
1152 self.consume_ascii_chars(2);
1153 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1154 } else if self.second_char() == Some('!') {
1155 self.consume_ascii_chars(2);
1156 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1157 } else {
1158 self.consume_ascii_chars(1);
1159 Some(LexedToken::punctuation(TokenKind::Background))
1160 }
1161 }
1162 '>' => {
1163 if self.second_char() == Some('>') {
1164 if self.third_char() == Some('|') {
1165 self.consume_ascii_chars(3);
1166 } else {
1167 self.consume_ascii_chars(2);
1168 }
1169 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1170 } else if self.second_char() == Some('|') {
1171 self.consume_ascii_chars(2);
1172 Some(LexedToken::punctuation(TokenKind::Clobber))
1173 } else if self.second_char() == Some('(') {
1174 self.consume_ascii_chars(2);
1175 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1176 } else if self.second_char() == Some('&') {
1177 self.consume_ascii_chars(2);
1178 Some(LexedToken::punctuation(TokenKind::DupOutput))
1179 } else {
1180 self.consume_ascii_chars(1);
1181 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1182 }
1183 }
1184 '<' => {
1185 if self.second_char() == Some('<') {
1186 if self.third_char() == Some('<') {
1187 self.consume_ascii_chars(3);
1188 Some(LexedToken::punctuation(TokenKind::HereString))
1189 } else if self.third_char() == Some('-') {
1190 self.consume_ascii_chars(3);
1191 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1192 } else {
1193 self.consume_ascii_chars(2);
1194 Some(LexedToken::punctuation(TokenKind::HereDoc))
1195 }
1196 } else if self.second_char() == Some('>') {
1197 self.consume_ascii_chars(2);
1198 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1199 } else if self.second_char() == Some('(') {
1200 self.consume_ascii_chars(2);
1201 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1202 } else if self.second_char() == Some('&') {
1203 self.consume_ascii_chars(2);
1204 Some(LexedToken::punctuation(TokenKind::DupInput))
1205 } else {
1206 self.consume_ascii_chars(1);
1207 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1208 }
1209 }
1210 '(' => {
1211 if self.second_char() == Some('(') {
1212 self.consume_ascii_chars(2);
1213 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1214 } else {
1215 self.consume_ascii_chars(1);
1216 Some(LexedToken::punctuation(TokenKind::LeftParen))
1217 }
1218 }
1219 ')' => {
1220 if self.second_char() == Some(')') {
1221 self.consume_ascii_chars(2);
1222 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1223 } else {
1224 self.consume_ascii_chars(1);
1225 Some(LexedToken::punctuation(TokenKind::RightParen))
1226 }
1227 }
1228 '{' => {
1229 let start = self.current_position();
1230 if self.ignore_braces_enabled() {
1231 self.consume_ascii_chars(1);
1232 match self.peek_char() {
1233 Some(' ') | Some('\t') | Some('\n') | None => {
1234 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1235 }
1236 _ => self.read_word_starting_with("{", start),
1237 }
1238 } else if self.looks_like_brace_expansion() {
1239 self.read_brace_expansion_word()
1243 } else if self.is_brace_group_start() {
1244 self.advance();
1245 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1246 } else if self.brace_literal_starts_case_pattern_delimiter() {
1247 self.read_word_starting_with("{", start)
1248 } else {
1249 self.read_brace_literal_word()
1250 }
1251 }
1252 '}' => {
1253 self.consume_ascii_chars(1);
1254 if self.ignore_close_braces_enabled() {
1255 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1256 } else {
1257 Some(LexedToken::punctuation(TokenKind::RightBrace))
1258 }
1259 }
1260 '[' => {
1261 let start = self.current_position();
1262 self.consume_ascii_chars(1);
1263 if self.peek_char() == Some('[')
1264 && matches!(
1265 self.second_char(),
1266 Some(' ') | Some('\t') | Some('\n') | None
1267 )
1268 {
1269 self.consume_ascii_chars(1);
1270 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1271 } else {
1272 match self.peek_char() {
1279 Some(' ') | Some('\t') | Some('\n') | None => {
1280 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1281 }
1282 _ => self.read_word_starting_with("[", start),
1283 }
1284 }
1285 }
1286 ']' => {
1287 if self.second_char() == Some(']') {
1288 self.consume_ascii_chars(2);
1289 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1290 } else {
1291 self.consume_ascii_chars(1);
1292 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1293 }
1294 }
1295 '\'' => self.read_single_quoted_string(),
1296 '"' => self.read_double_quoted_string(),
1297 '#' => {
1298 if self.should_treat_hash_as_word_char() {
1299 let start = self.current_position();
1300 return self.read_word_starting_with("#", start);
1301 }
1302 if preserve_comments {
1303 self.read_comment();
1304 Some(LexedToken::comment())
1305 } else {
1306 self.skip_comment();
1307 self.next_lexed_token_inner(false)
1308 }
1309 }
1310 '0'..='9' => self.read_word_or_fd_redirect(),
1312 _ => self.read_word(),
1313 }
1314 }
1315
1316 fn skip_whitespace(&mut self) {
1317 while let Some(ch) = self.peek_char() {
1318 if self.reinject_buf.is_empty() {
1319 let whitespace_len = self.source_horizontal_whitespace_len();
1320 if whitespace_len > 0 {
1321 self.consume_source_bytes(whitespace_len);
1322 continue;
1323 }
1324
1325 if self.cursor.rest().starts_with("\\\n") {
1326 self.consume_source_bytes(2);
1327 continue;
1328 }
1329 }
1330
1331 if ch == ' ' || ch == '\t' {
1332 self.consume_ascii_chars(1);
1333 } else if ch == '\\' {
1334 if self.second_char() == Some('\n') {
1336 self.consume_ascii_chars(2);
1337 } else {
1338 break;
1339 }
1340 } else {
1341 break;
1342 }
1343 }
1344 }
1345
1346 fn skip_comment(&mut self) {
1347 if self.reinject_buf.is_empty() {
1348 let end = self
1349 .cursor
1350 .find_byte(b'\n')
1351 .unwrap_or(self.cursor.rest().len());
1352 self.consume_source_bytes(end);
1353 return;
1354 }
1355
1356 while let Some(ch) = self.peek_char() {
1357 if ch == '\n' {
1358 break;
1359 }
1360 self.advance();
1361 }
1362 }
1363
1364 fn read_comment(&mut self) {
1365 debug_assert_eq!(self.peek_char(), Some('#'));
1366
1367 if self.reinject_buf.is_empty() {
1368 let rest = self.cursor.rest();
1369 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1370 self.consume_source_bytes(end);
1371 return;
1372 }
1373
1374 self.advance(); while let Some(ch) = self.peek_char() {
1377 if ch == '\n' {
1378 break;
1379 }
1380 self.advance();
1381 }
1382 }
1383
1384 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1385 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1386 return false;
1387 }
1388
1389 let line_start = self.input[..self.offset]
1390 .rfind('\n')
1391 .map_or(0, |index| index + 1);
1392 let prefix = &self.input[line_start..self.offset];
1393 line_has_unclosed_double_paren(prefix)
1394 }
1395
1396 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1399 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1400 let Some(fd) = first_digit.to_digit(10) else {
1401 unreachable!("peeked ASCII digit should convert to a base-10 digit");
1402 };
1403 let fd = fd as i32;
1404
1405 match (self.second_char(), self.third_char()) {
1406 (Some('>'), Some('>')) => {
1407 if self.fourth_char() == Some('|') {
1408 self.consume_ascii_chars(4);
1409 } else {
1410 self.consume_ascii_chars(3);
1411 }
1412 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1413 }
1414 (Some('>'), Some('|')) => {
1415 self.consume_ascii_chars(3);
1416 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1417 }
1418 (Some('>'), Some('&')) => {
1419 self.consume_ascii_chars(3);
1420
1421 let mut target_str = String::with_capacity(4);
1422 while let Some(c) = self.peek_char() {
1423 if c.is_ascii_digit() {
1424 target_str.push(c);
1425 self.advance();
1426 } else {
1427 break;
1428 }
1429 }
1430
1431 if target_str.is_empty() {
1432 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1433 }
1434
1435 let target_fd: i32 = target_str.parse().unwrap_or(1);
1436 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1437 }
1438 (Some('>'), _) => {
1439 self.consume_ascii_chars(2);
1440 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1441 }
1442 (Some('<'), Some('&')) => {
1443 self.consume_ascii_chars(3);
1444
1445 let mut target_str = String::with_capacity(4);
1446 while let Some(c) = self.peek_char() {
1447 if c.is_ascii_digit() || c == '-' {
1448 target_str.push(c);
1449 self.advance();
1450 if c == '-' {
1451 break;
1452 }
1453 } else {
1454 break;
1455 }
1456 }
1457
1458 if target_str == "-" {
1459 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1460 }
1461 let target_fd: i32 = target_str.parse().unwrap_or(0);
1462 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1463 }
1464 (Some('<'), Some('>')) => {
1465 self.consume_ascii_chars(3);
1466 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1467 }
1468 (Some('<'), Some('<')) => {}
1469 (Some('<'), _) => {
1470 self.consume_ascii_chars(2);
1471 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1472 }
1473 _ => {}
1474 }
1475 }
1476
1477 self.read_word()
1479 }
1480
1481 fn read_word_starting_with(
1482 &mut self,
1483 _prefix: &str,
1484 start: Position,
1485 ) -> Option<LexedToken<'a>> {
1486 let segment = match self.read_unquoted_segment(start) {
1487 Ok(segment) => segment,
1488 Err(kind) => return Some(LexedToken::error(kind)),
1489 };
1490 if segment.as_str().is_empty() {
1491 return None;
1492 }
1493 let mut lexed_word = LexedWord::from_segment(segment);
1494 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1495 return Some(LexedToken::error(kind));
1496 }
1497 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1498 }
1499
1500 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1501 let start = self.current_position();
1502
1503 if self.reinject_buf.is_empty() {
1504 let ascii_len = self.source_ascii_plain_word_len();
1505 let chunk = if ascii_len > 0
1506 && self
1507 .cursor
1508 .rest()
1509 .as_bytes()
1510 .get(ascii_len)
1511 .is_none_or(|byte| byte.is_ascii())
1512 {
1513 self.consume_source_bytes(ascii_len);
1514 &self.input[start.offset..self.offset]
1515 } else {
1516 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1517 self.advance_scanned_source_bytes(chunk.len());
1518 chunk
1519 };
1520 if !chunk.is_empty() {
1521 let continues = matches!(
1522 self.peek_char(),
1523 Some(next)
1524 if Self::is_word_char(next)
1525 || next == '$'
1526 || matches!(next, '\'' | '"')
1527 || next == '{'
1528 || (next == '\\' && self.second_char() == Some('\n'))
1529 || (next == '('
1530 && (chunk.ends_with('=')
1531 || Self::word_can_take_parenthesized_suffix(chunk)))
1532 );
1533
1534 if !continues {
1535 let end = self.current_position();
1536 return Some(LexedToken::borrowed_word(
1537 TokenKind::Word,
1538 &self.input[start.offset..self.offset],
1539 Some(Span::from_positions(start, end)),
1540 ));
1541 }
1542
1543 if self.peek_char() == Some('(')
1544 && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1545 {
1546 return self.read_complex_word(start);
1547 }
1548
1549 let end = self.current_position();
1550 return self.finish_segmented_word(LexedWord::borrowed(
1551 LexedWordSegmentKind::Plain,
1552 &self.input[start.offset..self.offset],
1553 Some(Span::from_positions(start, end)),
1554 ));
1555 }
1556 }
1557
1558 self.read_complex_word(start)
1559 }
1560
1561 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1562 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1563 return Some(LexedToken::error(kind));
1564 }
1565
1566 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1567 }
1568
1569 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1570 if self.peek_char() == Some('$') {
1571 match self.second_char() {
1572 Some('\'') => return self.read_dollar_single_quoted_string(),
1573 Some('"') => return self.read_dollar_double_quoted_string(),
1574 _ => {}
1575 }
1576 }
1577
1578 let segment = match self.read_unquoted_segment(start) {
1579 Ok(segment) => segment,
1580 Err(kind) => return Some(LexedToken::error(kind)),
1581 };
1582
1583 if segment.as_str().is_empty() {
1584 return None;
1585 }
1586
1587 self.finish_segmented_word(LexedWord::from_segment(segment))
1588 }
1589
1590 fn read_unquoted_segment(
1591 &mut self,
1592 start: Position,
1593 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1594 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1595 while let Some(ch) = self.peek_char() {
1596 if ch == '"' || ch == '\'' {
1597 break;
1598 } else if ch == '$' {
1599 if matches!(self.second_char(), Some('\'') | Some('"'))
1600 && (self.current_position().offset > start.offset
1601 || word.as_ref().is_some_and(|word| !word.is_empty()))
1602 {
1603 break;
1604 }
1605
1606 self.advance();
1608
1609 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1613 Self::push_capture_char(&mut word, '[');
1614 self.advance();
1615 if !self.read_legacy_arithmetic_into(&mut word, start) {
1616 return Err(LexerErrorKind::CommandSubstitution);
1617 }
1618 } else if self.peek_char() == Some('(') {
1619 if self.second_char() == Some('(') {
1620 if !self.read_arithmetic_expansion_into(&mut word) {
1621 return Err(LexerErrorKind::CommandSubstitution);
1622 }
1623 } else {
1624 Self::push_capture_char(&mut word, '(');
1625 self.advance();
1626 if !self.read_command_subst_into(&mut word) {
1627 return Err(LexerErrorKind::CommandSubstitution);
1628 }
1629 }
1630 } else if self.peek_char() == Some('{') {
1631 Self::push_capture_char(&mut word, '{');
1634 self.advance();
1635 let _ = self.read_param_expansion_into(&mut word, start);
1636 } else {
1637 if let Some(c) = self.peek_char() {
1639 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1640 || c.is_ascii_digit()
1641 {
1642 Self::push_capture_char(&mut word, c);
1643 self.advance();
1644 } else {
1645 while let Some(c) = self.peek_char() {
1647 if c.is_ascii_alphanumeric() || c == '_' {
1648 Self::push_capture_char(&mut word, c);
1649 self.advance();
1650 } else {
1651 break;
1652 }
1653 }
1654 }
1655 }
1656 }
1657 } else if ch == '{' {
1658 if self.looks_like_mid_word_brace_segment() {
1659 Self::push_capture_char(&mut word, ch);
1662 self.advance();
1663 self.consume_mid_word_brace_segment(&mut word);
1664 } else {
1665 Self::push_capture_char(&mut word, ch);
1668 self.advance();
1669 }
1670 } else if ch == '`' {
1671 let capture_end = self.current_position();
1674 self.ensure_capture_from_source(&mut word, start, capture_end);
1675 Self::push_capture_char(&mut word, ch);
1676 self.advance(); let mut closed = false;
1678 while let Some(c) = self.peek_char() {
1679 Self::push_capture_char(&mut word, c);
1680 self.advance();
1681 if c == '`' {
1682 closed = true;
1683 break;
1684 }
1685 if c == '\\'
1686 && let Some(next) = self.peek_char()
1687 {
1688 Self::push_capture_char(&mut word, next);
1689 self.advance();
1690 }
1691 }
1692 if !closed {
1693 return Err(LexerErrorKind::BacktickSubstitution);
1694 }
1695 } else if ch == '\\' {
1696 let capture_end = self.current_position();
1697 self.ensure_capture_from_source(&mut word, start, capture_end);
1698 self.advance();
1699 if let Some(next) = self.peek_char() {
1700 if next == '\n' {
1701 self.advance();
1703 } else {
1704 Self::push_capture_char(&mut word, '\x00');
1709 Self::push_capture_char(&mut word, next);
1710 self.advance();
1711 if next == '{'
1712 && self.current_word_surface_is_single_char(start, &word, '{')
1713 && self.escaped_brace_sequence_looks_like_brace_expansion()
1714 {
1715 let mut depth = 1;
1716 while let Some(c) = self.peek_char() {
1717 Self::push_capture_char(&mut word, c);
1718 self.advance();
1719 match c {
1720 '{' => depth += 1,
1721 '}' => {
1722 depth -= 1;
1723 if depth == 0 {
1724 break;
1725 }
1726 }
1727 _ => {}
1728 }
1729 }
1730 }
1731 }
1732 } else {
1733 Self::push_capture_char(&mut word, '\\');
1734 }
1735 } else if ch == '('
1736 && self.current_word_surface_ends_with_char(start, &word, '=')
1737 && self.looks_like_assoc_assign()
1738 {
1739 Self::push_capture_char(&mut word, ch);
1742 self.advance();
1743 let mut depth = 1;
1744 while let Some(c) = self.peek_char() {
1745 Self::push_capture_char(&mut word, c);
1746 self.advance();
1747 match c {
1748 '(' => depth += 1,
1749 ')' => {
1750 depth -= 1;
1751 if depth == 0 {
1752 break;
1753 }
1754 }
1755 '"' => {
1756 while let Some(qc) = self.peek_char() {
1757 Self::push_capture_char(&mut word, qc);
1758 self.advance();
1759 if qc == '"' {
1760 break;
1761 }
1762 if qc == '\\'
1763 && let Some(esc) = self.peek_char()
1764 {
1765 Self::push_capture_char(&mut word, esc);
1766 self.advance();
1767 }
1768 }
1769 }
1770 '\'' => {
1771 while let Some(qc) = self.peek_char() {
1772 Self::push_capture_char(&mut word, qc);
1773 self.advance();
1774 if qc == '\'' {
1775 break;
1776 }
1777 }
1778 }
1779 '\\' => {
1780 if let Some(esc) = self.peek_char() {
1781 Self::push_capture_char(&mut word, esc);
1782 self.advance();
1783 }
1784 }
1785 _ => {}
1786 }
1787 }
1788 } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1789 {
1790 Self::push_capture_char(&mut word, ch);
1793 self.advance();
1794 let mut depth = 1;
1795 while let Some(c) = self.peek_char() {
1796 Self::push_capture_char(&mut word, c);
1797 self.advance();
1798 match c {
1799 '(' => depth += 1,
1800 ')' => {
1801 depth -= 1;
1802 if depth == 0 {
1803 break;
1804 }
1805 }
1806 '\\' => {
1807 if let Some(esc) = self.peek_char() {
1808 Self::push_capture_char(&mut word, esc);
1809 self.advance();
1810 }
1811 }
1812 _ => {}
1813 }
1814 }
1815 } else if Self::is_plain_word_char(ch) {
1816 if self.reinject_buf.is_empty() {
1817 let ascii_len = self.source_ascii_plain_word_len();
1818 let chunk = if ascii_len > 0
1819 && self
1820 .cursor
1821 .rest()
1822 .as_bytes()
1823 .get(ascii_len)
1824 .is_none_or(|byte| byte.is_ascii())
1825 {
1826 self.consume_source_bytes(ascii_len);
1827 &self.input[self.offset - ascii_len..self.offset]
1828 } else {
1829 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1830 self.advance_scanned_source_bytes(chunk.len());
1831 chunk
1832 };
1833 Self::push_capture_str(&mut word, chunk);
1834 } else {
1835 Self::push_capture_char(&mut word, ch);
1836 self.advance();
1837 }
1838 } else {
1839 break;
1840 }
1841 }
1842
1843 if let Some(word) = word {
1844 let span = Some(Span::from_positions(start, self.current_position()));
1845 Ok(LexedWordSegment::owned_with_spans(
1846 LexedWordSegmentKind::Plain,
1847 word,
1848 span,
1849 span,
1850 ))
1851 } else {
1852 let end = self.current_position();
1853 Ok(LexedWordSegment::borrowed(
1854 LexedWordSegmentKind::Plain,
1855 &self.input[start.offset..self.offset],
1856 Some(Span::from_positions(start, end)),
1857 ))
1858 }
1859 }
1860
1861 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1862 let segment = match self.read_single_quoted_segment() {
1863 Ok(segment) => segment,
1864 Err(kind) => return Some(LexedToken::error(kind)),
1865 };
1866 let mut word = LexedWord::from_segment(segment);
1867 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1868 return Some(LexedToken::error(kind));
1869 }
1870
1871 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1872 }
1873
1874 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1875 debug_assert_eq!(self.peek_char(), Some('\''));
1876
1877 let wrapper_start = self.current_position();
1878 self.consume_ascii_chars(1); let content_start = self.current_position();
1880 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1881 let mut content_end = content_start;
1882 let mut content = String::with_capacity(16);
1883 let mut closed = false;
1884
1885 if can_borrow {
1886 let rest = self.cursor.rest();
1887 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1888 self.consume_source_bytes(quote_index);
1889 content_end = self.current_position();
1890 self.consume_ascii_chars(1); closed = true;
1892 } else {
1893 self.consume_source_bytes(rest.len());
1894 }
1895 }
1896
1897 while let Some(ch) = self.peek_char() {
1898 if closed {
1899 break;
1900 }
1901 if ch == '\'' {
1902 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1903 if !can_borrow {
1904 content.push('\'');
1905 }
1906 self.advance();
1907 self.advance();
1908 continue;
1909 }
1910 content_end = self.current_position();
1911 self.consume_ascii_chars(1); closed = true;
1913 break;
1914 }
1915 if !can_borrow {
1916 content.push(ch);
1917 }
1918 self.advance();
1919 }
1920
1921 if !closed {
1922 return Err(LexerErrorKind::SingleQuote);
1923 }
1924
1925 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1926 let content_span = Some(Span::from_positions(content_start, content_end));
1927
1928 if can_borrow {
1929 Ok(LexedWordSegment::borrowed_with_spans(
1930 LexedWordSegmentKind::SingleQuoted,
1931 &self.input[content_start.offset..content_end.offset],
1932 content_span,
1933 wrapper_span,
1934 ))
1935 } else {
1936 Ok(LexedWordSegment::owned_with_spans(
1937 LexedWordSegmentKind::SingleQuoted,
1938 content,
1939 content_span,
1940 wrapper_span,
1941 ))
1942 }
1943 }
1944
1945 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1946 let segment = match self.read_dollar_single_quoted_segment() {
1947 Ok(segment) => segment,
1948 Err(kind) => return Some(LexedToken::error(kind)),
1949 };
1950 let mut word = LexedWord::from_segment(segment);
1951 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1952 return Some(LexedToken::error(kind));
1953 }
1954
1955 let kind = if word.single_segment().is_some() {
1956 TokenKind::LiteralWord
1957 } else {
1958 TokenKind::Word
1959 };
1960
1961 Some(LexedToken::with_word_payload(kind, word))
1962 }
1963
1964 fn read_dollar_single_quoted_segment(
1965 &mut self,
1966 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1967 debug_assert_eq!(self.peek_char(), Some('$'));
1968 debug_assert_eq!(self.second_char(), Some('\''));
1969
1970 let wrapper_start = self.current_position();
1971 self.consume_ascii_chars(2); let content_start = self.current_position();
1973 let mut out = String::with_capacity(16);
1974
1975 while let Some(ch) = self.peek_char() {
1976 if ch == '\'' {
1977 let content_end = self.current_position();
1978 self.advance();
1979 let wrapper_span =
1980 Some(Span::from_positions(wrapper_start, self.current_position()));
1981 let content_span = Some(Span::from_positions(content_start, content_end));
1982 return Ok(LexedWordSegment::owned_with_spans(
1983 LexedWordSegmentKind::DollarSingleQuoted,
1984 out,
1985 content_span,
1986 wrapper_span,
1987 ));
1988 }
1989
1990 if ch == '\\' {
1991 self.advance();
1992 if let Some(esc) = self.peek_char() {
1993 self.advance();
1994 match esc {
1995 'n' => out.push('\n'),
1996 't' => out.push('\t'),
1997 'r' => out.push('\r'),
1998 'a' => out.push('\x07'),
1999 'b' => out.push('\x08'),
2000 'f' => out.push('\x0C'),
2001 'v' => out.push('\x0B'),
2002 'e' | 'E' => out.push('\x1B'),
2003 '\\' => out.push('\\'),
2004 '\'' => out.push('\''),
2005 '"' => out.push('"'),
2006 '?' => out.push('?'),
2007 'c' => {
2008 if let Some(control) = self.peek_char() {
2009 self.advance();
2010 out.push(((control as u32 & 0x1F) as u8) as char);
2011 } else {
2012 out.push('\\');
2013 out.push('c');
2014 }
2015 }
2016 'x' => {
2017 let mut hex = String::new();
2018 for _ in 0..2 {
2019 if let Some(h) = self.peek_char() {
2020 if h.is_ascii_hexdigit() {
2021 hex.push(h);
2022 self.advance();
2023 } else {
2024 break;
2025 }
2026 }
2027 }
2028 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2029 out.push(val as char);
2030 }
2031 }
2032 'u' => {
2033 let mut hex = String::new();
2034 for _ in 0..4 {
2035 if let Some(h) = self.peek_char() {
2036 if h.is_ascii_hexdigit() {
2037 hex.push(h);
2038 self.advance();
2039 } else {
2040 break;
2041 }
2042 }
2043 }
2044 if let Ok(val) = u32::from_str_radix(&hex, 16)
2045 && let Some(c) = char::from_u32(val)
2046 {
2047 out.push(c);
2048 }
2049 }
2050 'U' => {
2051 let mut hex = String::new();
2052 for _ in 0..8 {
2053 if let Some(h) = self.peek_char() {
2054 if h.is_ascii_hexdigit() {
2055 hex.push(h);
2056 self.advance();
2057 } else {
2058 break;
2059 }
2060 }
2061 }
2062 if let Ok(val) = u32::from_str_radix(&hex, 16)
2063 && let Some(c) = char::from_u32(val)
2064 {
2065 out.push(c);
2066 }
2067 }
2068 '0'..='7' => {
2069 let mut oct = String::new();
2070 oct.push(esc);
2071 for _ in 0..2 {
2072 if let Some(o) = self.peek_char() {
2073 if o.is_ascii_digit() && o < '8' {
2074 oct.push(o);
2075 self.advance();
2076 } else {
2077 break;
2078 }
2079 }
2080 }
2081 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2082 out.push(val as char);
2083 }
2084 }
2085 _ => {
2086 out.push('\\');
2087 out.push(esc);
2088 }
2089 }
2090 } else {
2091 out.push('\\');
2092 }
2093 continue;
2094 }
2095
2096 out.push(ch);
2097 self.advance();
2098 }
2099
2100 Err(LexerErrorKind::SingleQuote)
2101 }
2102
2103 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2104 let start = self.current_position();
2105
2106 if self.reinject_buf.is_empty() {
2107 let ascii_len = self.source_ascii_plain_word_len();
2108 let chunk = if ascii_len > 0
2109 && self
2110 .cursor
2111 .rest()
2112 .as_bytes()
2113 .get(ascii_len)
2114 .is_none_or(|byte| byte.is_ascii())
2115 {
2116 self.consume_source_bytes(ascii_len);
2117 &self.input[start.offset..self.offset]
2118 } else {
2119 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2120 self.advance_scanned_source_bytes(chunk.len());
2121 chunk
2122 };
2123 if chunk.is_empty() {
2124 return None;
2125 }
2126
2127 let end = self.current_position();
2128 return Some(LexedWordSegment::borrowed(
2129 LexedWordSegmentKind::Plain,
2130 &self.input[start.offset..self.offset],
2131 Some(Span::from_positions(start, end)),
2132 ));
2133 }
2134
2135 let ch = self.peek_char()?;
2136 if !Self::is_plain_word_char(ch) {
2137 return None;
2138 }
2139
2140 let mut text = String::with_capacity(16);
2141 while let Some(ch) = self.peek_char() {
2142 if !Self::is_plain_word_char(ch) {
2143 break;
2144 }
2145 text.push(ch);
2146 self.advance();
2147 }
2148
2149 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2150 }
2151
2152 fn append_segmented_continuation(
2155 &mut self,
2156 word: &mut LexedWord<'a>,
2157 ) -> Result<(), LexerErrorKind> {
2158 loop {
2159 match self.peek_char() {
2160 Some('\\') if self.second_char() == Some('\n') => {
2161 self.advance();
2162 self.advance();
2163 continue;
2164 }
2165 Some('\'') => {
2166 word.push_segment(self.read_single_quoted_segment()?);
2167 }
2168 Some('"') => {
2169 word.push_segment(self.read_double_quoted_segment()?);
2170 }
2171 Some('$') if self.second_char() == Some('\'') => {
2172 word.push_segment(self.read_dollar_single_quoted_segment()?);
2173 }
2174 Some('$') if self.second_char() == Some('"') => {
2175 word.push_segment(self.read_dollar_double_quoted_segment()?);
2176 }
2177 Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2178 let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2179 unreachable!("peeked '(' should produce a suffix segment");
2180 };
2181 word.push_segment(segment);
2182 }
2183 _ => {
2184 if let Some(segment) = self.read_plain_continuation_segment() {
2185 word.push_segment(segment);
2186 continue;
2187 }
2188
2189 let start = self.current_position();
2190 let plain = self.read_unquoted_segment(start)?;
2191 if plain.as_str().is_empty() {
2192 break;
2193 }
2194 word.push_segment(plain);
2195 }
2196 }
2197 }
2198
2199 Ok(())
2200 }
2201
2202 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2203 debug_assert_eq!(self.peek_char(), Some('('));
2204
2205 let start = self.current_position();
2206 let mut depth = 0usize;
2207 let mut escaped = false;
2208 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2209
2210 while let Some(ch) = self.peek_char() {
2211 if let Some(text) = text.as_mut() {
2212 text.push(ch);
2213 }
2214 self.advance();
2215
2216 if escaped {
2217 escaped = false;
2218 continue;
2219 }
2220
2221 match ch {
2222 '\\' => escaped = true,
2223 '(' => depth += 1,
2224 ')' => {
2225 depth = depth.saturating_sub(1);
2226 if depth == 0 {
2227 break;
2228 }
2229 }
2230 _ => {}
2231 }
2232 }
2233
2234 let end = self.current_position();
2235 let span = Some(Span::from_positions(start, end));
2236 if let Some(text) = text {
2237 Some(LexedWordSegment::owned_with_spans(
2238 LexedWordSegmentKind::Plain,
2239 text,
2240 span,
2241 span,
2242 ))
2243 } else {
2244 Some(LexedWordSegment::borrowed_with_spans(
2245 LexedWordSegmentKind::Plain,
2246 &self.input[start.offset..end.offset],
2247 span,
2248 span,
2249 ))
2250 }
2251 }
2252
2253 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2254 self.read_double_quoted_word(false)
2255 }
2256
2257 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2258 self.read_double_quoted_word(true)
2259 }
2260
2261 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2262 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2263 Ok(segment) => segment,
2264 Err(kind) => return Some(LexedToken::error(kind)),
2265 };
2266 let mut word = LexedWord::from_segment(segment);
2267 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2268 return Some(LexedToken::error(kind));
2269 }
2270
2271 let kind = if word.single_segment().is_some() {
2272 TokenKind::QuotedWord
2273 } else {
2274 TokenKind::Word
2275 };
2276
2277 Some(LexedToken::with_word_payload(kind, word))
2278 }
2279
2280 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2281 self.read_double_quoted_segment_with_dollar(false)
2282 }
2283
2284 fn read_dollar_double_quoted_segment(
2285 &mut self,
2286 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2287 self.read_double_quoted_segment_with_dollar(true)
2288 }
2289
2290 fn read_double_quoted_segment_with_dollar(
2291 &mut self,
2292 dollar: bool,
2293 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2294 if dollar {
2295 debug_assert_eq!(self.peek_char(), Some('$'));
2296 debug_assert_eq!(self.second_char(), Some('"'));
2297 } else {
2298 debug_assert_eq!(self.peek_char(), Some('"'));
2299 }
2300
2301 let wrapper_start = self.current_position();
2302 if dollar {
2303 self.consume_ascii_chars(2); } else {
2305 self.consume_ascii_chars(1); }
2307 let content_start = self.current_position();
2308 let mut content_end = content_start;
2309 let mut simple = self.reinject_buf.is_empty();
2310 let mut borrowable = self.reinject_buf.is_empty();
2311 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2312 let mut closed = false;
2313
2314 while let Some(ch) = self.peek_char() {
2315 if simple {
2316 if self.reinject_buf.is_empty() {
2317 let rest = self.cursor.rest();
2318 match Self::find_double_quote_special(rest) {
2319 Some(index) if index > 0 => {
2320 self.consume_source_bytes(index);
2321 continue;
2322 }
2323 None => {
2324 self.consume_source_bytes(rest.len());
2325 return Err(LexerErrorKind::DoubleQuote);
2326 }
2327 _ => {}
2328 }
2329 }
2330
2331 match ch {
2332 '"' => {
2333 content_end = self.current_position();
2334 self.consume_ascii_chars(1); closed = true;
2336 break;
2337 }
2338 '\\' | '$' | '`' => {
2339 simple = false;
2340 if ch == '`' {
2341 borrowable = false;
2342 let capture_end = self.current_position();
2343 self.ensure_capture_from_source(
2344 &mut content,
2345 content_start,
2346 capture_end,
2347 );
2348 }
2349 }
2350 _ => {
2351 self.advance();
2352 }
2353 }
2354 if simple {
2355 continue;
2356 }
2357 }
2358
2359 match ch {
2360 '"' => {
2361 if borrowable {
2362 content_end = self.current_position();
2363 }
2364 self.consume_ascii_chars(1); closed = true;
2366 break;
2367 }
2368 '\\' => {
2369 let escape_start = self.current_position();
2370 self.advance();
2371 if let Some(next) = self.peek_char() {
2372 match next {
2373 '\n' => {
2374 borrowable = false;
2375 self.ensure_capture_from_source(
2376 &mut content,
2377 content_start,
2378 escape_start,
2379 );
2380 self.advance();
2381 }
2382 '$' => {
2383 borrowable = false;
2384 self.ensure_capture_from_source(
2385 &mut content,
2386 content_start,
2387 escape_start,
2388 );
2389 Self::push_capture_char(&mut content, '\x00');
2390 Self::push_capture_char(&mut content, '$');
2391 self.advance();
2392 }
2393 '"' | '\\' | '`' => {
2394 borrowable = false;
2395 self.ensure_capture_from_source(
2396 &mut content,
2397 content_start,
2398 escape_start,
2399 );
2400 if next == '\\' {
2401 Self::push_capture_char(&mut content, '\x00');
2402 }
2403 if next == '`' {
2404 Self::push_capture_char(&mut content, '\x00');
2405 }
2406 Self::push_capture_char(&mut content, next);
2407 self.advance();
2408 content_end = self.current_position();
2409 }
2410 _ => {
2411 Self::push_capture_char(&mut content, '\\');
2412 Self::push_capture_char(&mut content, next);
2413 self.advance();
2414 content_end = self.current_position();
2415 }
2416 }
2417 }
2418 }
2419 '$' => {
2420 Self::push_capture_char(&mut content, '$');
2421 self.advance();
2422 if self.peek_char() == Some('(') {
2423 if self.second_char() == Some('(') {
2424 self.read_arithmetic_expansion_into(&mut content);
2425 } else {
2426 Self::push_capture_char(&mut content, '(');
2427 self.advance();
2428 self.read_command_subst_into(&mut content);
2429 }
2430 } else if self.peek_char() == Some('{') {
2431 Self::push_capture_char(&mut content, '{');
2432 self.advance();
2433 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2434 }
2435 content_end = self.current_position();
2436 }
2437 '`' => {
2438 borrowable = false;
2439 let capture_end = self.current_position();
2440 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2441 Self::push_capture_char(&mut content, '`');
2442 self.advance(); while let Some(c) = self.peek_char() {
2444 Self::push_capture_char(&mut content, c);
2445 self.advance();
2446 if c == '`' {
2447 break;
2448 }
2449 if c == '\\'
2450 && let Some(next) = self.peek_char()
2451 {
2452 Self::push_capture_char(&mut content, next);
2453 self.advance();
2454 }
2455 }
2456 content_end = self.current_position();
2457 }
2458 _ => {
2459 Self::push_capture_char(&mut content, ch);
2460 self.advance();
2461 content_end = self.current_position();
2462 }
2463 }
2464 }
2465
2466 if !closed {
2467 return Err(LexerErrorKind::DoubleQuote);
2468 }
2469
2470 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2471 let content_span = Some(Span::from_positions(content_start, content_end));
2472
2473 if borrowable {
2474 Ok(LexedWordSegment::borrowed_with_spans(
2475 if dollar {
2476 LexedWordSegmentKind::DollarDoubleQuoted
2477 } else {
2478 LexedWordSegmentKind::DoubleQuoted
2479 },
2480 &self.input[content_start.offset..content_end.offset],
2481 content_span,
2482 wrapper_span,
2483 ))
2484 } else {
2485 Ok(LexedWordSegment::owned_with_spans(
2486 if dollar {
2487 LexedWordSegmentKind::DollarDoubleQuoted
2488 } else {
2489 LexedWordSegmentKind::DoubleQuoted
2490 },
2491 content.unwrap_or_default(),
2492 content_span,
2493 wrapper_span,
2494 ))
2495 }
2496 }
2497
2498 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2499 debug_assert_eq!(self.peek_char(), Some('('));
2500 debug_assert_eq!(self.second_char(), Some('('));
2501
2502 Self::push_capture_char(content, '(');
2503 self.advance();
2504 Self::push_capture_char(content, '(');
2505 self.advance();
2506
2507 let mut depth = 2;
2508 while let Some(c) = self.peek_char() {
2509 match c {
2510 '\\' => {
2511 Self::push_capture_char(content, c);
2512 self.advance();
2513 if let Some(next) = self.peek_char() {
2514 Self::push_capture_char(content, next);
2515 self.advance();
2516 }
2517 }
2518 '\'' => {
2519 Self::push_capture_char(content, c);
2520 self.advance();
2521 while let Some(quoted) = self.peek_char() {
2522 Self::push_capture_char(content, quoted);
2523 self.advance();
2524 if quoted == '\'' {
2525 break;
2526 }
2527 }
2528 }
2529 '"' => {
2530 let mut escaped = false;
2531 Self::push_capture_char(content, c);
2532 self.advance();
2533 while let Some(quoted) = self.peek_char() {
2534 Self::push_capture_char(content, quoted);
2535 self.advance();
2536 if escaped {
2537 escaped = false;
2538 continue;
2539 }
2540 match quoted {
2541 '\\' => escaped = true,
2542 '"' => break,
2543 _ => {}
2544 }
2545 }
2546 }
2547 '`' => {
2548 let mut escaped = false;
2549 Self::push_capture_char(content, c);
2550 self.advance();
2551 while let Some(quoted) = self.peek_char() {
2552 Self::push_capture_char(content, quoted);
2553 self.advance();
2554 if escaped {
2555 escaped = false;
2556 continue;
2557 }
2558 match quoted {
2559 '\\' => escaped = true,
2560 '`' => break,
2561 _ => {}
2562 }
2563 }
2564 }
2565 '(' => {
2566 Self::push_capture_char(content, c);
2567 self.advance();
2568 depth += 1;
2569 }
2570 ')' => {
2571 Self::push_capture_char(content, c);
2572 self.advance();
2573 depth -= 1;
2574 if depth == 0 {
2575 return true;
2576 }
2577 }
2578 _ => {
2579 Self::push_capture_char(content, c);
2580 self.advance();
2581 }
2582 }
2583 }
2584
2585 false
2586 }
2587
2588 fn read_legacy_arithmetic_into(
2589 &mut self,
2590 content: &mut Option<String>,
2591 segment_start: Position,
2592 ) -> bool {
2593 let mut bracket_depth = 1;
2594
2595 while let Some(c) = self.peek_char() {
2596 match c {
2597 '\\' => {
2598 Self::push_capture_char(content, c);
2599 self.advance();
2600 if let Some(next) = self.peek_char() {
2601 Self::push_capture_char(content, next);
2602 self.advance();
2603 }
2604 }
2605 '\'' => {
2606 Self::push_capture_char(content, c);
2607 self.advance();
2608 while let Some(quoted) = self.peek_char() {
2609 Self::push_capture_char(content, quoted);
2610 self.advance();
2611 if quoted == '\'' {
2612 break;
2613 }
2614 }
2615 }
2616 '"' => {
2617 let mut escaped = false;
2618 Self::push_capture_char(content, c);
2619 self.advance();
2620 while let Some(quoted) = self.peek_char() {
2621 Self::push_capture_char(content, quoted);
2622 self.advance();
2623 if escaped {
2624 escaped = false;
2625 continue;
2626 }
2627 match quoted {
2628 '\\' => escaped = true,
2629 '"' => break,
2630 _ => {}
2631 }
2632 }
2633 }
2634 '`' => {
2635 let mut escaped = false;
2636 Self::push_capture_char(content, c);
2637 self.advance();
2638 while let Some(quoted) = self.peek_char() {
2639 Self::push_capture_char(content, quoted);
2640 self.advance();
2641 if escaped {
2642 escaped = false;
2643 continue;
2644 }
2645 match quoted {
2646 '\\' => escaped = true,
2647 '`' => break,
2648 _ => {}
2649 }
2650 }
2651 }
2652 '[' => {
2653 Self::push_capture_char(content, c);
2654 self.advance();
2655 bracket_depth += 1;
2656 }
2657 ']' => {
2658 Self::push_capture_char(content, c);
2659 self.advance();
2660 bracket_depth -= 1;
2661 if bracket_depth == 0 {
2662 return true;
2663 }
2664 }
2665 '$' => {
2666 Self::push_capture_char(content, c);
2667 self.advance();
2668 if self.peek_char() == Some('(') {
2669 if self.second_char() == Some('(') {
2670 if !self.read_arithmetic_expansion_into(content) {
2671 return false;
2672 }
2673 } else {
2674 Self::push_capture_char(content, '(');
2675 self.advance();
2676 if !self.read_command_subst_into(content) {
2677 return false;
2678 }
2679 }
2680 } else if self.peek_char() == Some('{') {
2681 Self::push_capture_char(content, '{');
2682 self.advance();
2683 if !self.read_param_expansion_into(content, segment_start) {
2684 return false;
2685 }
2686 } else if self.peek_char() == Some('[') {
2687 Self::push_capture_char(content, '[');
2688 self.advance();
2689 if !self.read_legacy_arithmetic_into(content, segment_start) {
2690 return false;
2691 }
2692 }
2693 }
2694 _ => {
2695 Self::push_capture_char(content, c);
2696 self.advance();
2697 }
2698 }
2699 }
2700
2701 false
2702 }
2703
2704 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2708 self.read_command_subst_into_depth(content, 0)
2709 }
2710
2711 fn flush_command_subst_keyword(
2712 current_word: &mut String,
2713 pending_case_headers: &mut usize,
2714 case_clause_depths: &mut SmallVec<[usize; 4]>,
2715 depth: usize,
2716 word_started_at_command_start: &mut bool,
2717 ) {
2718 if current_word.is_empty() {
2719 *word_started_at_command_start = false;
2720 return;
2721 }
2722
2723 match current_word.as_str() {
2724 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2725 "in" if *pending_case_headers > 0 => {
2726 *pending_case_headers -= 1;
2727 case_clause_depths.push(depth);
2728 }
2729 "esac" if *word_started_at_command_start => {
2730 case_clause_depths.pop();
2731 }
2732 _ => {}
2733 }
2734
2735 current_word.clear();
2736 *word_started_at_command_start = false;
2737 }
2738
2739 fn read_command_subst_heredoc_delimiter_into(
2740 &mut self,
2741 content: &mut Option<String>,
2742 ) -> Option<String> {
2743 while let Some(ch) = self.peek_char() {
2744 if !matches!(ch, ' ' | '\t') {
2745 break;
2746 }
2747 Self::push_capture_char(content, ch);
2748 self.advance();
2749 }
2750
2751 let mut cooked = String::new();
2752 let mut in_single = false;
2753 let mut in_double = false;
2754 let mut escaped = false;
2755 let mut saw_any = false;
2756
2757 while let Some(ch) = self.peek_char() {
2758 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2759 break;
2760 }
2761
2762 saw_any = true;
2763 Self::push_capture_char(content, ch);
2764 self.advance();
2765
2766 if escaped {
2767 cooked.push(ch);
2768 escaped = false;
2769 continue;
2770 }
2771
2772 match ch {
2773 '\\' if !in_single => escaped = true,
2774 '\'' if !in_double => in_single = !in_single,
2775 '"' if !in_single => in_double = !in_double,
2776 _ => cooked.push(ch),
2777 }
2778 }
2779
2780 saw_any.then_some(cooked)
2781 }
2782
2783 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2784 Self::push_capture_char(content, '`');
2785 self.advance();
2786 while let Some(ch) = self.peek_char() {
2787 Self::push_capture_char(content, ch);
2788 self.advance();
2789 if ch == '\\' {
2790 if let Some(esc) = self.peek_char() {
2791 Self::push_capture_char(content, esc);
2792 self.advance();
2793 }
2794 continue;
2795 }
2796 if ch == '`' {
2797 break;
2798 }
2799 }
2800 }
2801
2802 fn read_command_subst_pending_heredoc_into(
2803 &mut self,
2804 content: &mut Option<String>,
2805 delimiter: &str,
2806 strip_tabs: bool,
2807 ) -> bool {
2808 loop {
2809 let mut line = String::new();
2810 let mut saw_newline = false;
2811
2812 while let Some(ch) = self.peek_char() {
2813 self.advance();
2814 if ch == '\n' {
2815 saw_newline = true;
2816 break;
2817 }
2818 line.push(ch);
2819 }
2820
2821 Self::push_capture_str(content, &line);
2822 if saw_newline {
2823 Self::push_capture_char(content, '\n');
2824 }
2825
2826 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2827 return true;
2828 }
2829 }
2830 }
2831
2832 fn read_command_subst_into_depth(
2833 &mut self,
2834 content: &mut Option<String>,
2835 subst_depth: usize,
2836 ) -> bool {
2837 if subst_depth >= self.max_subst_depth {
2838 let mut depth = 1;
2840 while let Some(c) = self.peek_char() {
2841 self.advance();
2842 match c {
2843 '(' => depth += 1,
2844 ')' => {
2845 depth -= 1;
2846 if depth == 0 {
2847 Self::push_capture_char(content, ')');
2848 return true;
2849 }
2850 }
2851 _ => {}
2852 }
2853 }
2854 return false;
2855 }
2856
2857 let mut depth = 1;
2858 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2859 let mut pending_case_headers = 0usize;
2860 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2861 let mut current_word = String::with_capacity(16);
2862 let mut at_command_start = true;
2863 let mut expecting_redirection_target = false;
2864 let mut current_word_started_at_command_start = false;
2865 while let Some(c) = self.peek_char() {
2866 match c {
2867 '#' if !self.should_treat_hash_as_word_char() => {
2868 let had_word = !current_word.is_empty();
2869 Self::flush_command_subst_keyword(
2870 &mut current_word,
2871 &mut pending_case_headers,
2872 &mut case_clause_depths,
2873 depth,
2874 &mut current_word_started_at_command_start,
2875 );
2876 if had_word && expecting_redirection_target {
2877 expecting_redirection_target = false;
2878 }
2879 Self::push_capture_char(content, '#');
2880 self.advance();
2881 while let Some(comment_ch) = self.peek_char() {
2882 Self::push_capture_char(content, comment_ch);
2883 self.advance();
2884 if comment_ch == '\n' {
2885 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2886 if !self.read_command_subst_pending_heredoc_into(
2887 content, &delimiter, strip_tabs,
2888 ) {
2889 return false;
2890 }
2891 }
2892 at_command_start = true;
2893 expecting_redirection_target = false;
2894 break;
2895 }
2896 }
2897 }
2898 '(' => {
2899 Self::flush_command_subst_keyword(
2900 &mut current_word,
2901 &mut pending_case_headers,
2902 &mut case_clause_depths,
2903 depth,
2904 &mut current_word_started_at_command_start,
2905 );
2906 depth += 1;
2907 Self::push_capture_char(content, c);
2908 self.advance();
2909 at_command_start = true;
2910 expecting_redirection_target = false;
2911 }
2912 ')' => {
2913 Self::flush_command_subst_keyword(
2914 &mut current_word,
2915 &mut pending_case_headers,
2916 &mut case_clause_depths,
2917 depth,
2918 &mut current_word_started_at_command_start,
2919 );
2920 if case_clause_depths
2921 .last()
2922 .is_some_and(|case_depth| *case_depth == depth)
2923 {
2924 Self::push_capture_char(content, ')');
2925 self.advance();
2926 at_command_start = true;
2927 expecting_redirection_target = false;
2928 continue;
2929 }
2930 depth -= 1;
2931 self.advance();
2932 if depth == 0 {
2933 Self::push_capture_char(content, ')');
2934 return true;
2935 }
2936 Self::push_capture_char(content, c);
2937 at_command_start = false;
2938 expecting_redirection_target = false;
2939 }
2940 '"' => {
2941 let had_word = !current_word.is_empty();
2942 Self::flush_command_subst_keyword(
2943 &mut current_word,
2944 &mut pending_case_headers,
2945 &mut case_clause_depths,
2946 depth,
2947 &mut current_word_started_at_command_start,
2948 );
2949 if had_word && expecting_redirection_target {
2950 expecting_redirection_target = false;
2951 }
2952 Self::push_capture_char(content, '"');
2954 self.advance();
2955 while let Some(qc) = self.peek_char() {
2956 match qc {
2957 '"' => {
2958 Self::push_capture_char(content, '"');
2959 self.advance();
2960 break;
2961 }
2962 '\\' => {
2963 Self::push_capture_char(content, '\\');
2964 self.advance();
2965 if let Some(esc) = self.peek_char() {
2966 Self::push_capture_char(content, esc);
2967 self.advance();
2968 }
2969 }
2970 '$' => {
2971 Self::push_capture_char(content, '$');
2972 self.advance();
2973 if self.peek_char() == Some('(') {
2974 if self.second_char() == Some('(') {
2975 if !self.read_arithmetic_expansion_into(content) {
2976 return false;
2977 }
2978 } else {
2979 Self::push_capture_char(content, '(');
2980 self.advance();
2981 if !self
2982 .read_command_subst_into_depth(content, subst_depth + 1)
2983 {
2984 return false;
2985 }
2986 }
2987 }
2988 }
2989 _ => {
2990 Self::push_capture_char(content, qc);
2991 self.advance();
2992 }
2993 }
2994 }
2995 if expecting_redirection_target {
2996 expecting_redirection_target = false;
2997 } else {
2998 at_command_start = false;
2999 }
3000 }
3001 '\'' => {
3002 let had_word = !current_word.is_empty();
3003 Self::flush_command_subst_keyword(
3004 &mut current_word,
3005 &mut pending_case_headers,
3006 &mut case_clause_depths,
3007 depth,
3008 &mut current_word_started_at_command_start,
3009 );
3010 if had_word && expecting_redirection_target {
3011 expecting_redirection_target = false;
3012 }
3013 Self::push_capture_char(content, '\'');
3015 self.advance();
3016 while let Some(qc) = self.peek_char() {
3017 Self::push_capture_char(content, qc);
3018 self.advance();
3019 if qc == '\'' {
3020 break;
3021 }
3022 }
3023 if expecting_redirection_target {
3024 expecting_redirection_target = false;
3025 } else {
3026 at_command_start = false;
3027 }
3028 }
3029 '`' => {
3030 let had_word = !current_word.is_empty();
3031 Self::flush_command_subst_keyword(
3032 &mut current_word,
3033 &mut pending_case_headers,
3034 &mut case_clause_depths,
3035 depth,
3036 &mut current_word_started_at_command_start,
3037 );
3038 if had_word && expecting_redirection_target {
3039 expecting_redirection_target = false;
3040 }
3041 self.read_command_subst_backtick_segment_into(content);
3042 if expecting_redirection_target {
3043 expecting_redirection_target = false;
3044 } else {
3045 at_command_start = false;
3046 }
3047 }
3048 '$' if self.second_char() == Some('\'') => {
3049 let had_word = !current_word.is_empty();
3050 Self::flush_command_subst_keyword(
3051 &mut current_word,
3052 &mut pending_case_headers,
3053 &mut case_clause_depths,
3054 depth,
3055 &mut current_word_started_at_command_start,
3056 );
3057 if had_word && expecting_redirection_target {
3058 expecting_redirection_target = false;
3059 }
3060 Self::push_capture_char(content, '$');
3061 self.advance();
3062 Self::push_capture_char(content, '\'');
3063 self.advance();
3064 while let Some(qc) = self.peek_char() {
3065 Self::push_capture_char(content, qc);
3066 self.advance();
3067 if qc == '\\' {
3068 if let Some(esc) = self.peek_char() {
3069 Self::push_capture_char(content, esc);
3070 self.advance();
3071 }
3072 continue;
3073 }
3074 if qc == '\'' {
3075 break;
3076 }
3077 }
3078 if expecting_redirection_target {
3079 expecting_redirection_target = false;
3080 } else {
3081 at_command_start = false;
3082 }
3083 }
3084 '\\' => {
3085 let had_word = !current_word.is_empty();
3086 Self::flush_command_subst_keyword(
3087 &mut current_word,
3088 &mut pending_case_headers,
3089 &mut case_clause_depths,
3090 depth,
3091 &mut current_word_started_at_command_start,
3092 );
3093 if had_word && expecting_redirection_target {
3094 expecting_redirection_target = false;
3095 }
3096 Self::push_capture_char(content, '\\');
3097 self.advance();
3098 if let Some(esc) = self.peek_char() {
3099 Self::push_capture_char(content, esc);
3100 self.advance();
3101 }
3102 if expecting_redirection_target {
3103 expecting_redirection_target = false;
3104 } else {
3105 at_command_start = false;
3106 }
3107 }
3108 '<' if self.second_char() == Some('<') => {
3109 let word_was_redirection_fd = current_word_started_at_command_start
3110 && !current_word.is_empty()
3111 && current_word.chars().all(|current| current.is_ascii_digit());
3112 Self::flush_command_subst_keyword(
3113 &mut current_word,
3114 &mut pending_case_headers,
3115 &mut case_clause_depths,
3116 depth,
3117 &mut current_word_started_at_command_start,
3118 );
3119 if word_was_redirection_fd {
3120 at_command_start = true;
3121 }
3122
3123 Self::push_capture_char(content, '<');
3124 self.advance();
3125 Self::push_capture_char(content, '<');
3126 self.advance();
3127
3128 if self.peek_char() == Some('<') {
3129 Self::push_capture_char(content, '<');
3130 self.advance();
3131 expecting_redirection_target = true;
3132 continue;
3133 }
3134
3135 let strip_tabs = if self.peek_char() == Some('-') {
3136 Self::push_capture_char(content, '-');
3137 self.advance();
3138 true
3139 } else {
3140 false
3141 };
3142
3143 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3144 {
3145 pending_heredocs.push((delimiter, strip_tabs));
3146 expecting_redirection_target = false;
3147 } else {
3148 expecting_redirection_target = true;
3149 }
3150 }
3151 '>' | '<' => {
3152 let word_was_redirection_fd = current_word_started_at_command_start
3153 && !current_word.is_empty()
3154 && current_word.chars().all(|current| current.is_ascii_digit());
3155 Self::flush_command_subst_keyword(
3156 &mut current_word,
3157 &mut pending_case_headers,
3158 &mut case_clause_depths,
3159 depth,
3160 &mut current_word_started_at_command_start,
3161 );
3162 if word_was_redirection_fd {
3163 at_command_start = true;
3164 }
3165 Self::push_capture_char(content, c);
3166 self.advance();
3167 expecting_redirection_target = true;
3168 }
3169 '\n' => {
3170 Self::flush_command_subst_keyword(
3171 &mut current_word,
3172 &mut pending_case_headers,
3173 &mut case_clause_depths,
3174 depth,
3175 &mut current_word_started_at_command_start,
3176 );
3177 Self::push_capture_char(content, '\n');
3178 self.advance();
3179 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3180 if !self.read_command_subst_pending_heredoc_into(
3181 content, &delimiter, strip_tabs,
3182 ) {
3183 return false;
3184 }
3185 }
3186 at_command_start = true;
3187 expecting_redirection_target = false;
3188 }
3189 _ => {
3190 if c.is_ascii_alphanumeric() || c == '_' {
3191 if current_word.is_empty()
3192 && !expecting_redirection_target
3193 && at_command_start
3194 {
3195 current_word_started_at_command_start = true;
3196 at_command_start = false;
3197 }
3198 current_word.push(c);
3199 } else {
3200 let had_word = !current_word.is_empty();
3201 Self::flush_command_subst_keyword(
3202 &mut current_word,
3203 &mut pending_case_headers,
3204 &mut case_clause_depths,
3205 depth,
3206 &mut current_word_started_at_command_start,
3207 );
3208 if had_word && expecting_redirection_target {
3209 expecting_redirection_target = false;
3210 }
3211 match c {
3212 ' ' | '\t' => {}
3213 ';' | '|' | '&' => {
3214 at_command_start = true;
3215 expecting_redirection_target = false;
3216 }
3217 _ => {
3218 if !expecting_redirection_target {
3219 at_command_start = false;
3220 }
3221 }
3222 }
3223 }
3224 Self::push_capture_char(content, c);
3225 self.advance();
3226 }
3227 }
3228 }
3229
3230 false
3231 }
3232
3233 fn read_param_expansion_into(
3237 &mut self,
3238 content: &mut Option<String>,
3239 segment_start: Position,
3240 ) -> bool {
3241 let mut borrowable = true;
3242 let mut depth = 1;
3243 let mut literal_brace_depth = 0usize;
3244 let mut in_single = false;
3245 let mut in_double = false;
3246 let mut double_quote_depth = 0usize;
3247 while let Some(c) = self.peek_char() {
3248 if in_single {
3249 match c {
3250 '\\' => {
3251 let escape_start = self.current_position();
3252 if self.second_char() == Some('"') {
3253 self.advance();
3254 borrowable = false;
3255 self.ensure_capture_from_source(content, segment_start, escape_start);
3256 Self::push_capture_char(content, '"');
3257 self.advance();
3258 } else {
3259 Self::push_capture_char(content, '\\');
3260 self.advance();
3261 }
3262 }
3263 '\'' => {
3264 Self::push_capture_char(content, c);
3265 self.advance();
3266 in_single = false;
3267 }
3268 _ => {
3269 Self::push_capture_char(content, c);
3270 self.advance();
3271 }
3272 }
3273 continue;
3274 }
3275
3276 match c {
3277 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3278 self.advance();
3279 Self::push_capture_char(content, '}');
3280 if depth == 1
3281 && literal_brace_depth > 0
3282 && self.has_later_top_level_param_expansion_closer(depth)
3283 {
3284 literal_brace_depth -= 1;
3285 continue;
3286 }
3287 depth -= 1;
3288 if depth == 0 {
3289 break;
3290 }
3291 }
3292 '{' if !in_single && !in_double => {
3293 literal_brace_depth += 1;
3294 Self::push_capture_char(content, '{');
3295 self.advance();
3296 }
3297 '"' => {
3298 Self::push_capture_char(content, '"');
3300 self.advance();
3301 in_double = !in_double;
3302 double_quote_depth = if in_double { depth } else { 0 };
3303 }
3304 '\'' => {
3305 Self::push_capture_char(content, '\'');
3306 self.advance();
3307 if !in_double {
3308 in_single = true;
3309 }
3310 }
3311 '\\' => {
3312 let escape_start = self.current_position();
3315 self.advance();
3316 if let Some(esc) = self.peek_char() {
3317 match esc {
3318 '$' => {
3319 borrowable = false;
3320 self.ensure_capture_from_source(
3321 content,
3322 segment_start,
3323 escape_start,
3324 );
3325 Self::push_capture_char(content, '\x00');
3326 Self::push_capture_char(content, '$');
3327 self.advance();
3328 }
3329 '"' | '\\' | '`' => {
3330 borrowable = false;
3331 self.ensure_capture_from_source(
3332 content,
3333 segment_start,
3334 escape_start,
3335 );
3336 Self::push_capture_char(content, esc);
3337 self.advance();
3338 }
3339 '}' => {
3340 Self::push_capture_char(content, '\\');
3342 Self::push_capture_char(content, '}');
3343 self.advance();
3344 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3345 }
3346 _ => {
3347 Self::push_capture_char(content, '\\');
3348 Self::push_capture_char(content, esc);
3349 self.advance();
3350 }
3351 }
3352 } else {
3353 Self::push_capture_char(content, '\\');
3354 }
3355 }
3356 '$' => {
3357 Self::push_capture_char(content, '$');
3358 self.advance();
3359 if self.peek_char() == Some('(') {
3360 if self.second_char() == Some('(') {
3361 if !self.read_arithmetic_expansion_into(content) {
3362 borrowable = false;
3363 }
3364 } else {
3365 Self::push_capture_char(content, '(');
3366 self.advance();
3367 self.read_command_subst_into(content);
3368 }
3369 } else if self.peek_char() == Some('{') {
3370 Self::push_capture_char(content, '{');
3371 self.advance();
3372 borrowable &= self.read_param_expansion_into(content, segment_start);
3373 }
3374 }
3375 _ => {
3376 Self::push_capture_char(content, c);
3377 self.advance();
3378 }
3379 }
3380 }
3381 borrowable
3382 }
3383
3384 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3385 let mut chars = self.lookahead_chars().peekable();
3386 let mut depth = target_depth;
3387 let mut in_single = false;
3388 let mut in_double = false;
3389 let mut double_quote_depth = 0usize;
3390
3391 while let Some(ch) = chars.next() {
3392 if in_single {
3393 match ch {
3394 '\'' => in_single = false,
3395 '\\' if chars.peek() == Some(&'"') => {
3396 chars.next();
3397 }
3398 '\\' => {}
3399 _ => {}
3400 }
3401 continue;
3402 }
3403
3404 if in_double {
3405 match ch {
3406 '"' => {
3407 in_double = false;
3408 double_quote_depth = 0;
3409 }
3410 '\\' => {
3411 chars.next();
3412 }
3413 '$' if chars.peek() == Some(&'{') => {
3414 chars.next();
3415 depth += 1;
3416 }
3417 '}' if depth > double_quote_depth => {
3418 depth -= 1;
3419 }
3420 _ => {}
3421 }
3422 continue;
3423 }
3424
3425 match ch {
3426 '\n' if depth == target_depth => return false,
3427 '\'' => in_single = true,
3428 '"' => {
3429 in_double = true;
3430 double_quote_depth = depth;
3431 }
3432 '\\' => {
3433 chars.next();
3434 }
3435 '$' if chars.peek() == Some(&'{') => {
3436 chars.next();
3437 depth += 1;
3438 }
3439 '}' => {
3440 if depth == target_depth {
3441 return true;
3442 }
3443 depth -= 1;
3444 }
3445 _ => {}
3446 }
3447 }
3448
3449 false
3450 }
3451
3452 fn looks_like_brace_expansion(&self) -> bool {
3458 const MAX_LOOKAHEAD: usize = 10_000;
3459
3460 let mut chars = self.lookahead_chars();
3461
3462 if chars.next() != Some('{') {
3464 return false;
3465 }
3466
3467 let mut depth = 1;
3468 let mut paren_depth = 0usize;
3469 let mut has_comma = false;
3470 let mut has_dot_dot = false;
3471 let mut escaped = false;
3472 let mut in_single = false;
3473 let mut in_double = false;
3474 let mut in_backtick = false;
3475 let mut prev_char = None;
3476 let mut scanned = 0usize;
3477
3478 for ch in chars {
3479 scanned += 1;
3480 if scanned > MAX_LOOKAHEAD {
3481 return false;
3482 }
3483
3484 let brace_surface_active = !in_single && !in_double && !in_backtick;
3485 let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3486
3487 match ch {
3488 _ if escaped => {
3489 escaped = false;
3490 }
3491 '\\' if !in_single => escaped = true,
3492 '\'' if !in_double && !in_backtick => in_single = !in_single,
3493 '"' if !in_single && !in_backtick => in_double = !in_double,
3494 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3495 '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3496 paren_depth += 1
3497 }
3498 ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3499 '{' if !in_single && !in_double && !in_backtick => depth += 1,
3500 '}' if !in_single && !in_double && !in_backtick => {
3501 depth -= 1;
3502 if depth == 0 {
3503 return has_comma || has_dot_dot;
3505 }
3506 }
3507 ',' if at_top_level => has_comma = true,
3508 '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3509 ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3511 _ => {}
3512 }
3513 prev_char = Some(ch);
3514 }
3515
3516 false
3517 }
3518
3519 fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3520 let mut brace_depth = 1usize;
3521 let mut paren_depth = 0usize;
3522 let mut escaped = false;
3523 let mut in_single = false;
3524 let mut in_double = false;
3525 let mut in_backtick = false;
3526 let mut prev_char = None;
3527
3528 while let Some(ch) = self.peek_char() {
3529 Self::push_capture_char(word, ch);
3530 self.advance();
3531
3532 if escaped {
3533 escaped = false;
3534 prev_char = Some(ch);
3535 continue;
3536 }
3537
3538 match ch {
3539 '\\' if !in_single => escaped = true,
3540 '\'' if !in_double && !in_backtick => in_single = !in_single,
3541 '"' if !in_single && !in_backtick => in_double = !in_double,
3542 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3543 '(' if !in_single
3544 && !in_double
3545 && !in_backtick
3546 && (paren_depth > 0 || prev_char == Some('$')) =>
3547 {
3548 paren_depth += 1
3549 }
3550 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3551 paren_depth -= 1
3552 }
3553 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3554 '}' if !in_single && !in_double && !in_backtick => {
3555 brace_depth -= 1;
3556 if brace_depth == 0 {
3557 break;
3558 }
3559 }
3560 _ => {}
3561 }
3562
3563 prev_char = Some(ch);
3564 }
3565 }
3566
3567 fn consume_brace_word_body(&mut self, word: &mut String) {
3568 let mut brace_depth = 1usize;
3569 let mut paren_depth = 0usize;
3570 let mut escaped = false;
3571 let mut in_single = false;
3572 let mut in_double = false;
3573 let mut in_backtick = false;
3574 let mut prev_char = None;
3575
3576 while let Some(ch) = self.peek_char() {
3577 word.push(ch);
3578 self.advance();
3579
3580 if escaped {
3581 escaped = false;
3582 prev_char = Some(ch);
3583 continue;
3584 }
3585
3586 match ch {
3587 '\\' if !in_single => escaped = true,
3588 '\'' if !in_double && !in_backtick => in_single = !in_single,
3589 '"' if !in_single && !in_backtick => in_double = !in_double,
3590 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3591 '(' if !in_single
3592 && !in_double
3593 && !in_backtick
3594 && (paren_depth > 0 || prev_char == Some('$')) =>
3595 {
3596 paren_depth += 1
3597 }
3598 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3599 paren_depth -= 1
3600 }
3601 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3602 '}' if !in_single && !in_double && !in_backtick => {
3603 brace_depth -= 1;
3604 if brace_depth == 0 {
3605 break;
3606 }
3607 }
3608 _ => {}
3609 }
3610
3611 prev_char = Some(ch);
3612 }
3613 }
3614
3615 fn looks_like_mid_word_brace_segment(&self) -> bool {
3618 const MAX_LOOKAHEAD: usize = 10_000;
3619
3620 let mut chars = self.lookahead_chars();
3621 if chars.next() != Some('{') {
3622 return false;
3623 }
3624
3625 let mut brace_depth = 1;
3626 let mut paren_depth = 0usize;
3627 let mut escaped = false;
3628 let mut in_single = false;
3629 let mut in_double = false;
3630 let mut in_backtick = false;
3631 let mut prev_char = None;
3632 let mut scanned = 0usize;
3633
3634 for ch in chars {
3635 scanned += 1;
3636 if scanned > MAX_LOOKAHEAD {
3637 return false;
3638 }
3639
3640 if !in_single
3641 && !in_double
3642 && !in_backtick
3643 && !escaped
3644 && brace_depth == 1
3645 && paren_depth == 0
3646 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3647 {
3648 return false;
3649 }
3650
3651 if escaped {
3652 escaped = false;
3653 prev_char = Some(ch);
3654 continue;
3655 }
3656
3657 match ch {
3658 '\\' => escaped = true,
3659 '\'' if !in_double && !in_backtick => in_single = !in_single,
3660 '"' if !in_single && !in_backtick => in_double = !in_double,
3661 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3662 '(' if !in_single
3663 && !in_double
3664 && !in_backtick
3665 && (paren_depth > 0 || prev_char == Some('$')) =>
3666 {
3667 paren_depth += 1
3668 }
3669 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3670 paren_depth -= 1
3671 }
3672 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3673 '}' if !in_single && !in_double && !in_backtick => {
3674 brace_depth -= 1;
3675 if brace_depth == 0 {
3676 return true;
3677 }
3678 }
3679 _ => {}
3680 }
3681
3682 prev_char = Some(ch);
3683 }
3684
3685 false
3686 }
3687
3688 fn is_brace_group_start(&self) -> bool {
3690 let mut chars = self.lookahead_chars();
3691 if chars.next() != Some('{') {
3693 return false;
3694 }
3695 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3697 }
3698
3699 fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3702 const MAX_LOOKAHEAD: usize = 10_000;
3703
3704 let mut chars = self.lookahead_chars();
3705 let mut depth = 1;
3706 let mut has_comma = false;
3707 let mut has_dot_dot = false;
3708 let mut prev_char = None;
3709 let mut scanned = 0usize;
3710
3711 for ch in chars.by_ref() {
3712 scanned += 1;
3713 if scanned > MAX_LOOKAHEAD {
3714 return false;
3715 }
3716 match ch {
3717 '{' => depth += 1,
3718 '}' => {
3719 depth -= 1;
3720 if depth == 0 {
3721 return has_comma || has_dot_dot;
3722 }
3723 }
3724 ',' if depth == 1 => has_comma = true,
3725 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3726 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3727 _ => {}
3728 }
3729 prev_char = Some(ch);
3730 }
3731
3732 false
3733 }
3734
3735 fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3736 let mut chars = self.lookahead_chars();
3737 if chars.next() != Some('{') {
3738 return false;
3739 }
3740 chars.next() == Some(')')
3741 }
3742
3743 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3745 let mut word = String::with_capacity(16);
3746
3747 if let Some('{') = self.peek_char() {
3748 word.push('{');
3749 self.advance();
3750 } else {
3751 return None;
3752 }
3753
3754 self.consume_brace_word_body(&mut word);
3755
3756 while let Some(ch) = self.peek_char() {
3757 if Self::is_word_char(ch) {
3758 if self.reinject_buf.is_empty() {
3759 let chunk = self.cursor.eat_while(Self::is_word_char);
3760 word.push_str(chunk);
3761 self.advance_scanned_source_bytes(chunk.len());
3762 } else {
3763 word.push(ch);
3764 self.advance();
3765 }
3766 } else {
3767 break;
3768 }
3769 }
3770
3771 Some(LexedToken::owned_word(TokenKind::Word, word))
3772 }
3773
3774 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3776 let mut word = String::with_capacity(16);
3777
3778 if let Some('{') = self.peek_char() {
3780 word.push('{');
3781 self.advance();
3782 } else {
3783 return None;
3784 }
3785
3786 self.consume_brace_word_body(&mut word);
3788
3789 while let Some(ch) = self.peek_char() {
3791 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3792 if ch == '{' {
3793 word.push(ch);
3795 self.advance();
3796 self.consume_brace_word_body(&mut word);
3797 } else {
3798 word.push(ch);
3799 self.advance();
3800 }
3801 } else {
3802 break;
3803 }
3804 }
3805
3806 Some(LexedToken::owned_word(TokenKind::Word, word))
3807 }
3808
3809 fn looks_like_assoc_assign(&self) -> bool {
3813 let mut chars = self.lookahead_chars();
3814 if chars.next() != Some('(') {
3816 return false;
3817 }
3818 for ch in chars {
3820 match ch {
3821 ' ' | '\t' => continue,
3822 '[' => return true,
3823 _ => return false,
3824 }
3825 }
3826 false
3827 }
3828
3829 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3830 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3831 }
3832
3833 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3834 word.segments().any(|segment| {
3835 matches!(
3836 segment.kind(),
3837 LexedWordSegmentKind::SingleQuoted
3838 | LexedWordSegmentKind::DollarSingleQuoted
3839 | LexedWordSegmentKind::DoubleQuoted
3840 | LexedWordSegmentKind::DollarDoubleQuoted
3841 )
3842 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3843 }
3844
3845 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3846 text.contains(['*', '?'])
3847 || text.ends_with('}') && text.contains("${")
3848 || text.ends_with(']')
3849 && text
3850 .rfind('[')
3851 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3852 }
3853
3854 fn is_word_char(ch: char) -> bool {
3855 !matches!(
3856 ch,
3857 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3858 )
3859 }
3860
3861 const fn is_ascii_word_byte(byte: u8) -> bool {
3862 !matches!(
3863 byte,
3864 b' ' | b'\t'
3865 | b'\n'
3866 | b';'
3867 | b'|'
3868 | b'&'
3869 | b'>'
3870 | b'<'
3871 | b'('
3872 | b')'
3873 | b'{'
3874 | b'}'
3875 | b'\''
3876 | b'"'
3877 )
3878 }
3879
3880 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3881 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3882 }
3883
3884 fn is_plain_word_char(ch: char) -> bool {
3885 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3886 }
3887
3888 pub(super) fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3890 let mut content = String::with_capacity(64);
3891 let mut current_line = String::with_capacity(64);
3892
3893 let mut rest_of_line = String::with_capacity(32);
3900 let rest_of_line_start = self.current_position();
3901 let mut in_double_quote = false;
3902 let mut in_single_quote = false;
3903 let mut in_comment = false;
3904 let mut saw_non_whitespace_tail = false;
3905 let mut consecutive_backslashes = 0usize;
3906 let mut previous_tail_char = None;
3907 while let Some(ch) = self.peek_char() {
3908 self.advance();
3909 if in_comment {
3910 if ch == '\n' {
3911 break;
3912 }
3913 rest_of_line.push(ch);
3914 previous_tail_char = Some(ch);
3915 continue;
3916 }
3917 if ch == '#'
3918 && !in_single_quote
3919 && !in_double_quote
3920 && self.comments_enabled()
3921 && heredoc_tail_hash_starts_comment(previous_tail_char)
3922 {
3923 in_comment = true;
3924 rest_of_line.push(ch);
3925 previous_tail_char = Some(ch);
3926 consecutive_backslashes = 0;
3927 continue;
3928 }
3929 let backslash_continues_line = ch == '\\'
3930 && !in_single_quote
3931 && self.peek_char() == Some('\n')
3932 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3933 && consecutive_backslashes.is_multiple_of(2);
3934 if backslash_continues_line {
3935 rest_of_line.push(ch);
3936 rest_of_line.push('\n');
3937 self.advance();
3938 consecutive_backslashes = 0;
3939 continue;
3940 }
3941 if ch == '\n' && !in_double_quote && !in_single_quote {
3942 break;
3943 }
3944 if ch == '"' && !in_single_quote {
3945 in_double_quote = !in_double_quote;
3946 } else if ch == '\'' && !in_double_quote {
3947 in_single_quote = !in_single_quote;
3948 } else if ch == '\\' && in_double_quote {
3949 rest_of_line.push(ch);
3951 if let Some(next) = self.peek_char() {
3952 rest_of_line.push(next);
3953 self.advance();
3954 }
3955 continue;
3956 }
3957 rest_of_line.push(ch);
3958 if !ch.is_whitespace() {
3959 saw_non_whitespace_tail = true;
3960 }
3961 if ch == '\\' && !in_single_quote {
3962 consecutive_backslashes += 1;
3963 } else {
3964 consecutive_backslashes = 0;
3965 }
3966 previous_tail_char = Some(ch);
3967 }
3968
3969 self.sync_offset_to_cursor();
3973 let content_start = self.current_position();
3974 let mut current_line_start = content_start;
3975 let content_end;
3976
3977 loop {
3979 if self.reinject_buf.is_empty() {
3980 self.sync_offset_to_cursor();
3986 let rest = self.cursor.rest();
3987 if rest.is_empty() {
3988 content_end = self.current_position();
3989 break;
3990 }
3991
3992 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3993 let line = &rest[..line_len];
3994 let has_newline = line_len < rest.len();
3995
3996 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3997 content_end = current_line_start;
3998 self.consume_source_bytes(line_len);
3999 if has_newline {
4000 self.consume_ascii_chars(1);
4001 }
4002 break;
4003 }
4004
4005 content.push_str(line);
4006 self.consume_source_bytes(line_len);
4007
4008 if has_newline {
4009 self.consume_ascii_chars(1);
4010 content.push('\n');
4011 current_line_start = self.current_position();
4012 continue;
4013 }
4014
4015 content_end = self.current_position();
4016 break;
4017 }
4018
4019 match self.peek_char() {
4020 Some('\n') => {
4021 self.advance();
4022 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4024 content_end = current_line_start;
4025 break;
4026 }
4027 content.push_str(¤t_line);
4028 content.push('\n');
4029 current_line.clear();
4030 current_line_start = self.current_position();
4031 }
4032 Some(ch) => {
4033 current_line.push(ch);
4034 self.advance();
4035 }
4036 None => {
4037 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4039 content_end = current_line_start;
4040 break;
4041 }
4042 if !current_line.is_empty() {
4043 content.push_str(¤t_line);
4044 }
4045 content_end = self.current_position();
4046 break;
4047 }
4048 }
4049 }
4050
4051 let post_heredoc_offset = self.offset;
4056 self.offset = rest_of_line_start.offset;
4057 for ch in rest_of_line.chars() {
4058 self.reinject_buf.push_back(ch);
4059 }
4060 self.reinject_buf.push_back('\n');
4061 self.reinject_resume_offset = Some(post_heredoc_offset);
4062
4063 HeredocRead {
4064 content,
4065 content_span: Span::from_positions(content_start, content_end),
4066 }
4067 }
4068
4069 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4070 let mut chars = self.cursor.rest().chars();
4071 if chars.next() != Some('\n') {
4072 return false;
4073 }
4074
4075 for ch in chars {
4076 if matches!(ch, ' ' | '\t') {
4077 continue;
4078 }
4079 if ch == '\n' {
4080 return false;
4081 }
4082 return matches!(ch, '|' | '&' | ';' | '<' | '>')
4083 || (ch == '#' && self.comments_enabled());
4084 }
4085
4086 false
4087 }
4088}
4089
4090fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4091 let line = if strip_tabs {
4092 line.trim_start_matches('\t')
4093 } else {
4094 line
4095 };
4096
4097 if line == delimiter {
4098 return true;
4099 }
4100
4101 let Some(trailing) = line.strip_prefix(delimiter) else {
4102 return false;
4103 };
4104
4105 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4106}
4107
4108fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4109 previous_tail_char.is_none_or(|prev| {
4110 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4111 })
4112}
4113
4114fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4115 let ch = input.get(index..)?.chars().next()?;
4116 Some((ch, index + ch.len_utf8()))
4117}
4118
4119fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4120 let mut index = 0usize;
4121 let mut depth = 0usize;
4122 let mut in_single = false;
4123 let mut in_double = false;
4124 let mut in_backtick = false;
4125 let mut escaped = false;
4126
4127 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4128 let was_escaped = escaped;
4129 if ch == '\\' && !in_single {
4130 escaped = !escaped;
4131 index = next_index;
4132 continue;
4133 }
4134 escaped = false;
4135
4136 match ch {
4137 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4138 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4139 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4140 '(' if !in_single
4141 && !in_double
4142 && !in_backtick
4143 && !was_escaped
4144 && prefix[next_index..].starts_with('(') =>
4145 {
4146 depth += 1;
4147 index = next_index + '('.len_utf8();
4148 continue;
4149 }
4150 ')' if !in_single
4151 && !in_double
4152 && !in_backtick
4153 && !was_escaped
4154 && prefix[next_index..].starts_with(')') =>
4155 {
4156 depth = depth.saturating_sub(1);
4157 index = next_index + ')'.len_utf8();
4158 continue;
4159 }
4160 _ => {}
4161 }
4162
4163 index = next_index;
4164 }
4165
4166 depth > 0
4167}
4168
4169fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4170 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4171 let prefix = &input[line_start..index];
4172 line_has_unclosed_double_paren(prefix)
4173}
4174
4175fn hash_starts_comment(input: &str, index: usize) -> bool {
4176 if inside_unclosed_double_paren_on_line(input, index) {
4177 return false;
4178 }
4179
4180 let next = &input[index + '#'.len_utf8()..];
4181 input[..index]
4182 .chars()
4183 .next_back()
4184 .is_none_or(|prev| match prev {
4185 '(' => {
4186 let whitespace_index = next.find(char::is_whitespace);
4187 let close_index = next.find(')');
4188
4189 match (whitespace_index, close_index) {
4190 (Some(whitespace), Some(close)) => whitespace < close,
4191 (Some(_), None) | (None, None) => true,
4192 (None, Some(_)) => false,
4193 }
4194 }
4195 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4196 })
4197}
4198
4199fn heredoc_delimiter_is_terminator(
4200 ch: char,
4201 in_single: bool,
4202 in_double: bool,
4203 escaped: bool,
4204) -> bool {
4205 !in_single
4206 && !in_double
4207 && !escaped
4208 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4209}
4210
4211fn scan_double_quoted_command_substitution_segment(
4212 input: &str,
4213 mut index: usize,
4214 subst_depth: usize,
4215) -> Option<usize> {
4216 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4217 match ch {
4218 '"' => return Some(next_index),
4219 '\\' => {
4220 index = next_index;
4221 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4222 index = escaped_next;
4223 }
4224 }
4225 '$' if input[next_index..].starts_with('{') => {
4226 let consumed = scan_command_subst_parameter_expansion_len(
4227 &input[next_index + '{'.len_utf8()..],
4228 subst_depth,
4229 0,
4230 )?;
4231 index = next_index + '{'.len_utf8() + consumed;
4232 }
4233 '$' if input[next_index..].starts_with('(')
4234 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4235 {
4236 let consumed = scan_command_substitution_body_len_inner(
4237 &input[next_index + '('.len_utf8()..],
4238 subst_depth + 1,
4239 )?;
4240 index = next_index + '('.len_utf8() + consumed;
4241 }
4242 _ => index = next_index,
4243 }
4244 }
4245
4246 None
4247}
4248
4249fn scan_command_subst_parameter_expansion_len(
4250 input: &str,
4251 subst_depth: usize,
4252 parameter_depth: usize,
4253) -> Option<usize> {
4254 if parameter_depth >= MAX_PARAMETER_EXPANSION_SCAN_DEPTH {
4255 return scan_command_subst_parameter_expansion_len_balanced(input, subst_depth);
4256 }
4257
4258 let mut index = 0usize;
4259 let mut in_single = false;
4260 let mut in_double = false;
4261 let mut in_ansi_c_single = false;
4262 let mut in_backtick = false;
4263 let mut escaped = false;
4264 let mut ansi_c_quote_pending = false;
4265
4266 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4267 let was_escaped = escaped;
4268 if ch == '\\' && !in_single {
4269 escaped = !escaped;
4270 index = next_index;
4271 ansi_c_quote_pending = false;
4272 continue;
4273 }
4274 escaped = false;
4275
4276 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4277 if input[next_index..].starts_with('{')
4278 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4279 &input[next_index + '{'.len_utf8()..],
4280 subst_depth,
4281 parameter_depth + 1,
4282 )
4283 {
4284 index = next_index + '{'.len_utf8() + consumed;
4285 ansi_c_quote_pending = false;
4286 continue;
4287 }
4288
4289 if input[next_index..].starts_with('(')
4290 && !input[next_index + '('.len_utf8()..].starts_with('(')
4291 && let Some(consumed) = scan_command_substitution_body_len_inner(
4292 &input[next_index + '('.len_utf8()..],
4293 subst_depth + 1,
4294 )
4295 {
4296 index = next_index + '('.len_utf8() + consumed;
4297 ansi_c_quote_pending = false;
4298 continue;
4299 }
4300 }
4301
4302 if !in_single
4303 && !in_ansi_c_single
4304 && !in_double
4305 && !in_backtick
4306 && !was_escaped
4307 && matches!(ch, '<' | '>')
4308 && input[next_index..].starts_with('(')
4309 && let Some(consumed) = scan_command_substitution_body_len_inner(
4310 &input[next_index + '('.len_utf8()..],
4311 subst_depth + 1,
4312 )
4313 {
4314 index = next_index + '('.len_utf8() + consumed;
4315 ansi_c_quote_pending = false;
4316 continue;
4317 }
4318
4319 match ch {
4320 '\'' if !in_double && !in_backtick && !was_escaped => {
4321 if in_ansi_c_single {
4322 in_ansi_c_single = false;
4323 } else if !in_single && ansi_c_quote_pending {
4324 in_ansi_c_single = true;
4325 } else {
4326 in_single = !in_single;
4327 }
4328 }
4329 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4330 in_double = !in_double
4331 }
4332 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4333 in_backtick = !in_backtick
4334 }
4335 '}' if !in_single
4336 && !in_ansi_c_single
4337 && !in_double
4338 && !in_backtick
4339 && !was_escaped =>
4340 {
4341 return Some(next_index);
4342 }
4343 _ => {}
4344 }
4345
4346 ansi_c_quote_pending = ch == '$'
4347 && !in_single
4348 && !in_ansi_c_single
4349 && !in_double
4350 && !in_backtick
4351 && !was_escaped;
4352 index = next_index;
4353 }
4354
4355 None
4356}
4357
4358fn scan_command_subst_parameter_expansion_len_balanced(
4359 input: &str,
4360 subst_depth: usize,
4361) -> Option<usize> {
4362 let mut index = 0usize;
4363 let mut brace_depth = 1usize;
4364 let mut in_single = false;
4365 let mut in_double = false;
4366 let mut in_ansi_c_single = false;
4367 let mut in_backtick = false;
4368 let mut escaped = false;
4369 let mut ansi_c_quote_pending = false;
4370
4371 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4372 let was_escaped = escaped;
4373 if ch == '\\' && !in_single {
4374 escaped = !escaped;
4375 index = next_index;
4376 ansi_c_quote_pending = false;
4377 continue;
4378 }
4379 escaped = false;
4380
4381 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4382 if input[next_index..].starts_with('{') {
4383 brace_depth = brace_depth.saturating_add(1);
4384 index = next_index + '{'.len_utf8();
4385 ansi_c_quote_pending = false;
4386 continue;
4387 }
4388
4389 if input[next_index..].starts_with('(')
4390 && !input[next_index + '('.len_utf8()..].starts_with('(')
4391 && let Some(consumed) = scan_command_substitution_body_len_inner(
4392 &input[next_index + '('.len_utf8()..],
4393 subst_depth + 1,
4394 )
4395 {
4396 index = next_index + '('.len_utf8() + consumed;
4397 ansi_c_quote_pending = false;
4398 continue;
4399 }
4400 }
4401
4402 if !in_single
4403 && !in_ansi_c_single
4404 && !in_double
4405 && !in_backtick
4406 && !was_escaped
4407 && matches!(ch, '<' | '>')
4408 && input[next_index..].starts_with('(')
4409 && let Some(consumed) = scan_command_substitution_body_len_inner(
4410 &input[next_index + '('.len_utf8()..],
4411 subst_depth + 1,
4412 )
4413 {
4414 index = next_index + '('.len_utf8() + consumed;
4415 ansi_c_quote_pending = false;
4416 continue;
4417 }
4418
4419 match ch {
4420 '\'' if !in_double && !in_backtick && !was_escaped => {
4421 if in_ansi_c_single {
4422 in_ansi_c_single = false;
4423 } else if !in_single && ansi_c_quote_pending {
4424 in_ansi_c_single = true;
4425 } else {
4426 in_single = !in_single;
4427 }
4428 }
4429 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4430 in_double = !in_double
4431 }
4432 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4433 in_backtick = !in_backtick
4434 }
4435 '}' if !in_single
4436 && !in_ansi_c_single
4437 && !in_double
4438 && !in_backtick
4439 && !was_escaped =>
4440 {
4441 brace_depth = brace_depth.saturating_sub(1);
4442 if brace_depth == 0 {
4443 return Some(next_index);
4444 }
4445 }
4446 _ => {}
4447 }
4448
4449 ansi_c_quote_pending = ch == '$'
4450 && !in_single
4451 && !in_ansi_c_single
4452 && !in_double
4453 && !in_backtick
4454 && !was_escaped;
4455 index = next_index;
4456 }
4457
4458 None
4459}
4460
4461fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4462 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4463 if !matches!(ch, ' ' | '\t') {
4464 break;
4465 }
4466 index = next_index;
4467 }
4468
4469 let start = index;
4470 let mut cooked = String::new();
4471 let mut in_single = false;
4472 let mut in_double = false;
4473 let mut escaped = false;
4474
4475 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4476 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4477 break;
4478 }
4479
4480 index = next_index;
4481 if escaped {
4482 cooked.push(ch);
4483 escaped = false;
4484 continue;
4485 }
4486
4487 match ch {
4488 '\\' if !in_single => escaped = true,
4489 '\'' if !in_double => in_single = !in_single,
4490 '"' if !in_single => in_double = !in_double,
4491 _ => cooked.push(ch),
4492 }
4493 }
4494
4495 (index > start).then_some((index, cooked))
4496}
4497
4498fn skip_command_subst_pending_heredoc(
4499 input: &str,
4500 mut index: usize,
4501 delimiter: &str,
4502 strip_tabs: bool,
4503) -> usize {
4504 while index <= input.len() {
4505 let rest = &input[index..];
4506 let line_len = rest.find('\n').unwrap_or(rest.len());
4507 let line = &rest[..line_len];
4508 let has_newline = line_len < rest.len();
4509
4510 index += line_len;
4511 if has_newline {
4512 index += '\n'.len_utf8();
4513 }
4514
4515 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4516 return index;
4517 }
4518 }
4519
4520 index
4521}
4522
4523fn scan_command_subst_ansi_c_single_quoted_segment(
4524 input: &str,
4525 quote_index: usize,
4526) -> Option<usize> {
4527 let mut index = quote_index + '\''.len_utf8();
4528
4529 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4530 index = next_index;
4531 if ch == '\\' {
4532 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4533 index = escaped_next;
4534 }
4535 continue;
4536 }
4537
4538 if ch == '\'' {
4539 return Some(index);
4540 }
4541 }
4542
4543 None
4544}
4545
4546fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4547 let mut index = start;
4548
4549 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4550 index = next_index;
4551 if ch == '\\' {
4552 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4553 index = escaped_next;
4554 }
4555 continue;
4556 }
4557
4558 if ch == '`' {
4559 return Some(index);
4560 }
4561 }
4562
4563 None
4564}
4565
4566fn flush_scanned_command_subst_keyword(
4567 current_word: &mut String,
4568 pending_case_headers: &mut usize,
4569 case_clause_depths: &mut SmallVec<[usize; 4]>,
4570 depth: usize,
4571 word_started_at_command_start: &mut bool,
4572) {
4573 if current_word.is_empty() {
4574 *word_started_at_command_start = false;
4575 return;
4576 }
4577
4578 match current_word.as_str() {
4579 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4580 "in" if *pending_case_headers > 0 => {
4581 *pending_case_headers -= 1;
4582 case_clause_depths.push(depth);
4583 }
4584 "esac" if *word_started_at_command_start => {
4585 case_clause_depths.pop();
4586 }
4587 _ => {}
4588 }
4589
4590 current_word.clear();
4591 *word_started_at_command_start = false;
4592}
4593
4594pub(super) fn scan_command_substitution_body_len_inner(
4595 input: &str,
4596 subst_depth: usize,
4597) -> Option<usize> {
4598 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4599 return None;
4600 }
4601
4602 let mut index = 0usize;
4603 let mut depth = 1;
4604 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4605 let mut pending_case_headers = 0usize;
4606 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4607 let mut current_word = String::with_capacity(16);
4608 let mut at_command_start = true;
4609 let mut expecting_redirection_target = false;
4610 let mut current_word_started_at_command_start = false;
4611
4612 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4613 match ch {
4614 '#' if hash_starts_comment(input, index) => {
4615 let had_word = !current_word.is_empty();
4616 flush_scanned_command_subst_keyword(
4617 &mut current_word,
4618 &mut pending_case_headers,
4619 &mut case_clause_depths,
4620 depth,
4621 &mut current_word_started_at_command_start,
4622 );
4623 if had_word && expecting_redirection_target {
4624 expecting_redirection_target = false;
4625 }
4626 index = next_index;
4627 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4628 index = comment_next;
4629 if comment_ch == '\n' {
4630 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4631 index = skip_command_subst_pending_heredoc(
4632 input, index, &delimiter, strip_tabs,
4633 );
4634 }
4635 at_command_start = true;
4636 expecting_redirection_target = false;
4637 break;
4638 }
4639 }
4640 }
4641 '(' => {
4642 flush_scanned_command_subst_keyword(
4643 &mut current_word,
4644 &mut pending_case_headers,
4645 &mut case_clause_depths,
4646 depth,
4647 &mut current_word_started_at_command_start,
4648 );
4649 depth += 1;
4650 index = next_index;
4651 at_command_start = true;
4652 expecting_redirection_target = false;
4653 }
4654 ')' => {
4655 flush_scanned_command_subst_keyword(
4656 &mut current_word,
4657 &mut pending_case_headers,
4658 &mut case_clause_depths,
4659 depth,
4660 &mut current_word_started_at_command_start,
4661 );
4662 if case_clause_depths
4663 .last()
4664 .is_some_and(|case_depth| *case_depth == depth)
4665 {
4666 index = next_index;
4667 at_command_start = true;
4668 expecting_redirection_target = false;
4669 continue;
4670 }
4671 depth -= 1;
4672 index = next_index;
4673 if depth == 0 {
4674 return Some(index);
4675 }
4676 at_command_start = false;
4677 expecting_redirection_target = false;
4678 }
4679 '"' => {
4680 let had_word = !current_word.is_empty();
4681 flush_scanned_command_subst_keyword(
4682 &mut current_word,
4683 &mut pending_case_headers,
4684 &mut case_clause_depths,
4685 depth,
4686 &mut current_word_started_at_command_start,
4687 );
4688 if had_word && expecting_redirection_target {
4689 expecting_redirection_target = false;
4690 }
4691 index = scan_double_quoted_command_substitution_segment(
4692 input,
4693 next_index,
4694 subst_depth,
4695 )?;
4696 if expecting_redirection_target {
4697 expecting_redirection_target = false;
4698 } else {
4699 at_command_start = false;
4700 }
4701 }
4702 '\'' => {
4703 let had_word = !current_word.is_empty();
4704 flush_scanned_command_subst_keyword(
4705 &mut current_word,
4706 &mut pending_case_headers,
4707 &mut case_clause_depths,
4708 depth,
4709 &mut current_word_started_at_command_start,
4710 );
4711 if had_word && expecting_redirection_target {
4712 expecting_redirection_target = false;
4713 }
4714 index = next_index;
4715 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4716 index = quoted_next;
4717 if quoted_ch == '\'' {
4718 break;
4719 }
4720 }
4721 if expecting_redirection_target {
4722 expecting_redirection_target = false;
4723 } else {
4724 at_command_start = false;
4725 }
4726 }
4727 '`' => {
4728 let had_word = !current_word.is_empty();
4729 flush_scanned_command_subst_keyword(
4730 &mut current_word,
4731 &mut pending_case_headers,
4732 &mut case_clause_depths,
4733 depth,
4734 &mut current_word_started_at_command_start,
4735 );
4736 if had_word && expecting_redirection_target {
4737 expecting_redirection_target = false;
4738 }
4739 index = scan_command_subst_backtick_segment(input, next_index)?;
4740 if expecting_redirection_target {
4741 expecting_redirection_target = false;
4742 } else {
4743 at_command_start = false;
4744 }
4745 }
4746 '$' if input[next_index..].starts_with('\'') => {
4747 let had_word = !current_word.is_empty();
4748 flush_scanned_command_subst_keyword(
4749 &mut current_word,
4750 &mut pending_case_headers,
4751 &mut case_clause_depths,
4752 depth,
4753 &mut current_word_started_at_command_start,
4754 );
4755 if had_word && expecting_redirection_target {
4756 expecting_redirection_target = false;
4757 }
4758 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4759 if expecting_redirection_target {
4760 expecting_redirection_target = false;
4761 } else {
4762 at_command_start = false;
4763 }
4764 }
4765 '\\' => {
4766 let had_word = !current_word.is_empty();
4767 flush_scanned_command_subst_keyword(
4768 &mut current_word,
4769 &mut pending_case_headers,
4770 &mut case_clause_depths,
4771 depth,
4772 &mut current_word_started_at_command_start,
4773 );
4774 if had_word && expecting_redirection_target {
4775 expecting_redirection_target = false;
4776 }
4777 index = next_index;
4778 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4779 index = escaped_next;
4780 }
4781 if expecting_redirection_target {
4782 expecting_redirection_target = false;
4783 } else {
4784 at_command_start = false;
4785 }
4786 }
4787 '>' => {
4788 let word_was_redirection_fd = current_word_started_at_command_start
4789 && !current_word.is_empty()
4790 && current_word.chars().all(|current| current.is_ascii_digit());
4791 flush_scanned_command_subst_keyword(
4792 &mut current_word,
4793 &mut pending_case_headers,
4794 &mut case_clause_depths,
4795 depth,
4796 &mut current_word_started_at_command_start,
4797 );
4798 if word_was_redirection_fd {
4799 at_command_start = true;
4800 }
4801 index = next_index;
4802 expecting_redirection_target = true;
4803 }
4804 '<' if input[next_index..].starts_with('<') => {
4805 let word_was_redirection_fd = current_word_started_at_command_start
4806 && !current_word.is_empty()
4807 && current_word.chars().all(|current| current.is_ascii_digit());
4808 let had_word = !current_word.is_empty();
4809 flush_scanned_command_subst_keyword(
4810 &mut current_word,
4811 &mut pending_case_headers,
4812 &mut case_clause_depths,
4813 depth,
4814 &mut current_word_started_at_command_start,
4815 );
4816 if had_word && expecting_redirection_target {
4817 expecting_redirection_target = false;
4818 }
4819 if word_was_redirection_fd {
4820 at_command_start = true;
4821 }
4822 if inside_unclosed_double_paren_on_line(input, index) {
4823 index = next_index + '<'.len_utf8();
4824 continue;
4825 }
4826
4827 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4828 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4829 expecting_redirection_target = true;
4830 continue;
4831 }
4832
4833 let strip_tabs = input[next_index..].starts_with("<-");
4834 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4835 if let Some((delimiter_index, delimiter)) =
4836 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4837 {
4838 pending_heredocs.push((delimiter, strip_tabs));
4839 index = delimiter_index;
4840 expecting_redirection_target = false;
4841 } else {
4842 index = next_index;
4843 expecting_redirection_target = true;
4844 }
4845 }
4846 '\n' => {
4847 flush_scanned_command_subst_keyword(
4848 &mut current_word,
4849 &mut pending_case_headers,
4850 &mut case_clause_depths,
4851 depth,
4852 &mut current_word_started_at_command_start,
4853 );
4854 index = next_index;
4855 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4856 index =
4857 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4858 }
4859 at_command_start = true;
4860 expecting_redirection_target = false;
4861 }
4862 '$' if input[next_index..].starts_with('{') => {
4863 let had_word = !current_word.is_empty();
4864 flush_scanned_command_subst_keyword(
4865 &mut current_word,
4866 &mut pending_case_headers,
4867 &mut case_clause_depths,
4868 depth,
4869 &mut current_word_started_at_command_start,
4870 );
4871 if had_word && expecting_redirection_target {
4872 expecting_redirection_target = false;
4873 }
4874 let consumed = scan_command_subst_parameter_expansion_len(
4875 &input[next_index + '{'.len_utf8()..],
4876 subst_depth,
4877 0,
4878 )?;
4879 index = next_index + '{'.len_utf8() + consumed;
4880 if expecting_redirection_target {
4881 expecting_redirection_target = false;
4882 } else {
4883 at_command_start = false;
4884 }
4885 }
4886 '$' if input[next_index..].starts_with('(')
4887 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4888 {
4889 let had_word = !current_word.is_empty();
4890 flush_scanned_command_subst_keyword(
4891 &mut current_word,
4892 &mut pending_case_headers,
4893 &mut case_clause_depths,
4894 depth,
4895 &mut current_word_started_at_command_start,
4896 );
4897 if had_word && expecting_redirection_target {
4898 expecting_redirection_target = false;
4899 }
4900 let consumed = scan_command_substitution_body_len_inner(
4901 &input[next_index + '('.len_utf8()..],
4902 subst_depth + 1,
4903 )?;
4904 index = next_index + '('.len_utf8() + consumed;
4905 if expecting_redirection_target {
4906 expecting_redirection_target = false;
4907 } else {
4908 at_command_start = false;
4909 }
4910 }
4911 _ => {
4912 if ch.is_ascii_alphanumeric() || ch == '_' {
4913 if current_word.is_empty() && !expecting_redirection_target && at_command_start
4914 {
4915 current_word_started_at_command_start = true;
4916 at_command_start = false;
4917 }
4918 current_word.push(ch);
4919 } else {
4920 let had_word = !current_word.is_empty();
4921 flush_scanned_command_subst_keyword(
4922 &mut current_word,
4923 &mut pending_case_headers,
4924 &mut case_clause_depths,
4925 depth,
4926 &mut current_word_started_at_command_start,
4927 );
4928 if had_word && expecting_redirection_target {
4929 expecting_redirection_target = false;
4930 }
4931 match ch {
4932 ' ' | '\t' => {}
4933 ';' | '|' | '&' => {
4934 at_command_start = true;
4935 expecting_redirection_target = false;
4936 }
4937 _ => {
4938 if !expecting_redirection_target {
4939 at_command_start = false;
4940 }
4941 }
4942 }
4943 }
4944 index = next_index;
4945 }
4946 }
4947 }
4948
4949 None
4950}
4951
4952pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4953 scan_command_substitution_body_len_inner(input, 0)
4954}
4955
4956#[cfg(test)]
4957mod tests {
4958 use super::*;
4959
4960 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4961 match token.kind {
4962 kind if kind.is_word_like() => token.word_string(),
4963 TokenKind::Comment => token
4964 .span
4965 .slice(source)
4966 .strip_prefix('#')
4967 .map(str::to_string),
4968 TokenKind::Error => token
4969 .error_kind()
4970 .map(LexerErrorKind::message)
4971 .map(str::to_string),
4972 _ => None,
4973 }
4974 }
4975
4976 fn assert_next_token(
4977 lexer: &mut Lexer<'_>,
4978 expected_kind: TokenKind,
4979 expected_text: Option<&str>,
4980 ) {
4981 let token = lexer.next_lexed_token().unwrap();
4982 assert_eq!(token.kind, expected_kind);
4983 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4984 }
4985
4986 fn assert_next_token_with_comments(
4987 lexer: &mut Lexer<'_>,
4988 expected_kind: TokenKind,
4989 expected_text: Option<&str>,
4990 ) {
4991 let token = lexer.next_lexed_token_with_comments().unwrap();
4992 assert_eq!(token.kind, expected_kind);
4993 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4994 }
4995
4996 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4997 let mut lexer = Lexer::new(input);
4998
4999 while let Some(token) = lexer.next_lexed_token() {
5000 if token.kind == TokenKind::Newline {
5001 continue;
5002 }
5003
5004 assert_eq!(
5005 token.span.start.line, token.span.end.line,
5006 "token should stay on one line: {:?}",
5007 token
5008 );
5009 }
5010 }
5011
5012 #[test]
5013 fn test_simple_words() {
5014 let mut lexer = Lexer::new("echo hello world");
5015
5016 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5017 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5018 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5019 assert!(lexer.next_lexed_token().is_none());
5020 }
5021
5022 #[test]
5023 fn test_single_quoted_string() {
5024 let mut lexer = Lexer::new("echo 'hello world'");
5025
5026 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5027 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
5029 assert!(lexer.next_lexed_token().is_none());
5030 }
5031
5032 #[test]
5033 fn test_double_quoted_string() {
5034 let mut lexer = Lexer::new("echo \"hello world\"");
5035
5036 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5037 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
5038 assert!(lexer.next_lexed_token().is_none());
5039 }
5040
5041 #[test]
5042 fn test_brace_expansion_token_ignores_quoted_closers() {
5043 let mut lexer = Lexer::new("echo {\"}\",a}\n");
5044
5045 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5046 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
5047 assert_next_token(&mut lexer, TokenKind::Newline, None);
5048 assert!(lexer.next_lexed_token().is_none());
5049 }
5050
5051 #[test]
5052 fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
5053 let mut lexer = Lexer::new("echo {'a\\',b} next\n");
5054
5055 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5056 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
5057 assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
5058 assert_next_token(&mut lexer, TokenKind::Newline, None);
5059 assert!(lexer.next_lexed_token().is_none());
5060 }
5061
5062 #[test]
5063 fn test_double_quoted_expansion_token_keeps_source_backing() {
5064 let source = r#""$bar""#;
5065 let mut lexer = Lexer::new(source);
5066
5067 let token = lexer.next_lexed_token().unwrap();
5068 assert_eq!(token.kind, TokenKind::QuotedWord);
5069 assert_eq!(token.word_text(), Some("$bar"));
5070
5071 let word = token.word().unwrap();
5072 let segment = word.single_segment().unwrap();
5073 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5074 assert_eq!(segment.span().unwrap().slice(source), "$bar");
5075 }
5076
5077 #[test]
5078 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
5079 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
5080 let mut lexer = Lexer::new(source);
5081
5082 let token = lexer.next_lexed_token().unwrap();
5083 assert_eq!(token.kind, TokenKind::QuotedWord);
5084 assert_eq!(
5085 token.word_text(),
5086 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
5087 );
5088 }
5089
5090 #[test]
5091 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
5092 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
5093 let mut lexer = Lexer::new(source);
5094
5095 let token = lexer.next_lexed_token().unwrap();
5096 assert_eq!(token.kind, TokenKind::QuotedWord);
5097 assert_eq!(
5098 token.word_text(),
5099 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
5100 );
5101 }
5102
5103 #[test]
5104 fn test_deep_command_substitution_preserves_simple_parameter_expansion() {
5105 let source = r#""$(echo "$(echo "$(echo "$(echo "${name}")")")")""#;
5106 let mut lexer = Lexer::new(source);
5107
5108 let token = lexer.next_lexed_token().unwrap();
5109 assert_eq!(token.kind, TokenKind::QuotedWord);
5110 assert_eq!(
5111 token.word_text(),
5112 Some(r#"$(echo "$(echo "$(echo "$(echo "${name}")")")")"#)
5113 );
5114 }
5115
5116 #[test]
5117 fn test_command_substitution_preserves_deep_parameter_operand_paren() {
5118 let source = r#""$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")""#;
5119 let mut lexer = Lexer::new(source);
5120
5121 let token = lexer.next_lexed_token().unwrap();
5122 assert_eq!(token.kind, TokenKind::QuotedWord);
5123 assert_eq!(
5124 token.word_text(),
5125 Some(r#"$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")"#)
5126 );
5127 }
5128
5129 #[test]
5130 fn test_mixed_word_keeps_segment_kinds() {
5131 let source = r#"foo"bar"'baz'"#;
5132 let mut lexer = Lexer::new(source);
5133
5134 let token = lexer.next_lexed_token().unwrap();
5135 assert_eq!(token.kind, TokenKind::Word);
5136
5137 let word = token.word().unwrap();
5138 let segments: Vec<_> = word
5139 .segments()
5140 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5141 .collect();
5142
5143 assert_eq!(
5144 segments,
5145 vec![
5146 (LexedWordSegmentKind::Plain, "foo".to_string()),
5147 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
5148 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
5149 ]
5150 );
5151 assert_eq!(word.joined_text(), "foobarbaz");
5152 assert_eq!(
5153 word.segments()
5154 .next()
5155 .and_then(LexedWordSegment::span)
5156 .unwrap()
5157 .slice(source),
5158 "foo"
5159 );
5160 }
5161
5162 #[test]
5163 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5164 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5165
5166 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5167 let body = &source[..consumed];
5168
5169 assert!(body.contains("field, direction"));
5170 assert!(body.ends_with(')'));
5171 }
5172
5173 #[test]
5174 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5175 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5176
5177 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5178 let body = &source[..consumed];
5179
5180 assert!(body.contains("printf '%s' y"));
5181 assert!(body.ends_with(')'));
5182 }
5183
5184 #[test]
5185 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5186 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5187
5188 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5189 let body = &source[..consumed];
5190
5191 assert!(body.contains("printf %s 1,2"));
5192 assert!(body.ends_with(')'));
5193 }
5194
5195 #[test]
5196 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5197 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5198
5199 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5200 let body = &source[..consumed];
5201
5202 assert!(body.contains("field, direction"));
5203 assert!(body.ends_with(')'));
5204 }
5205
5206 #[test]
5207 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5208 let source = "printf %s ${x//foo/)},1)\"";
5209
5210 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5211 let body = &source[..consumed];
5212
5213 assert!(body.contains("${x//foo/)},1"));
5214 assert!(body.ends_with(')'));
5215 }
5216
5217 #[test]
5218 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5219 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5220
5221 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5222 let body = &source[..consumed];
5223
5224 assert!(body.contains("printf %s 1,2"));
5225 assert!(body.ends_with(')'));
5226 }
5227
5228 #[test]
5229 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5230 let source = "[[ \"$buf\" == (#b)(*) ]]";
5231 let index = source.find('#').expect("expected hash");
5232
5233 assert!(!hash_starts_comment(source, index));
5234 }
5235
5236 #[test]
5237 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5238 let source = "(#comment with )";
5239 let index = source.find('#').expect("expected hash");
5240
5241 assert!(hash_starts_comment(source, index));
5242 }
5243
5244 #[test]
5245 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5246 let source = "(( #c < 256 ))";
5247 let index = source.find('#').expect("expected hash");
5248
5249 assert!(!hash_starts_comment(source, index));
5250 }
5251
5252 #[test]
5253 fn test_hash_starts_comment_respects_quoted_double_parens() {
5254 let source = "printf '((' # comment";
5255 let index = source.find('#').expect("expected hash");
5256
5257 assert!(hash_starts_comment(source, index));
5258 }
5259
5260 #[test]
5261 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5262 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5263
5264 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5265 let body = &source[..consumed];
5266
5267 assert!(body.contains("printf %s 1,2"));
5268 assert!(body.ends_with(')'));
5269 }
5270
5271 #[test]
5272 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5273 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5274
5275 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5276 let body = &source[..consumed];
5277
5278 assert!(body.contains("printf %s 1,2"));
5279 assert!(body.ends_with(')'));
5280 }
5281
5282 #[test]
5283 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5284 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5285
5286 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5287 let body = &source[..consumed];
5288
5289 assert!(body.contains("printf %s 1,2"));
5290 assert!(body.ends_with(')'));
5291 }
5292
5293 #[test]
5294 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5295 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5296
5297 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5298 let body = &source[..consumed];
5299
5300 assert!(body.contains("printf %s 1,2"));
5301 assert!(body.ends_with("))"));
5302 }
5303
5304 #[test]
5305 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5306 let source = "printf %s 1,2; echo case in)\"";
5307
5308 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5309 let body = &source[..consumed];
5310
5311 assert!(body.contains("echo case in"));
5312 assert!(body.ends_with(')'));
5313 }
5314
5315 #[test]
5316 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5317 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5318
5319 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5320 let body = &source[..consumed];
5321
5322 assert!(body.contains("$'a\\'b'"));
5323 assert!(body.contains("printf %s 1,2"));
5324 assert!(body.ends_with(')'));
5325 }
5326
5327 #[test]
5328 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5329 let source = "printf %s `echo foo)`; printf %s ok)\"";
5330
5331 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5332 let body = &source[..consumed];
5333
5334 assert!(body.contains("`echo foo)`"));
5335 assert!(body.contains("printf %s ok"));
5336 assert!(body.ends_with(')'));
5337 }
5338
5339 #[test]
5340 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5341 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5342
5343 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5344 let body = &source[..consumed];
5345
5346 assert!(body.contains("${x/`echo }`/foo)},1"));
5347 assert!(body.ends_with(')'));
5348 }
5349
5350 #[test]
5351 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5352 {
5353 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5354
5355 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5356 let body = &source[..consumed];
5357
5358 assert!(body.contains("${x/<(echo })/foo)},1"));
5359 assert!(body.ends_with(')'));
5360 }
5361
5362 #[test]
5363 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5364 let source = "printf %s 1,2; echo case in)";
5365
5366 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5367 let body = &source[..consumed];
5368
5369 assert_eq!(body, source);
5370 }
5371
5372 #[test]
5373 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5374 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5375
5376 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5377 let body = &source[..consumed];
5378
5379 assert_eq!(body, source);
5380 }
5381
5382 #[test]
5383 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5384 let source = "printf %s `echo foo)`; printf %s ok)";
5385
5386 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5387 let body = &source[..consumed];
5388
5389 assert_eq!(body, source);
5390 }
5391
5392 #[test]
5393 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5394 let source = "echo \"$line\" | cut -d' ' -f2-)";
5395
5396 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5397 let body = &source[..consumed];
5398
5399 assert_eq!(body, source);
5400 }
5401
5402 #[test]
5403 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5404 let source = "echo \"${@}\" | tr -d '[:space:]')";
5405
5406 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5407 let body = &source[..consumed];
5408
5409 assert_eq!(body, source);
5410 }
5411
5412 #[test]
5413 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5414 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5415
5416 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5417 let body = &source[..consumed];
5418
5419 assert_eq!(body, source);
5420 }
5421
5422 #[test]
5423 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5424 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5425
5426 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5427 let body = &source[..consumed];
5428
5429 assert_eq!(body, source);
5430 }
5431
5432 #[test]
5433 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5434 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5435 let mut lexer = Lexer::new(source);
5436
5437 let first = lexer.next_lexed_token().expect("expected first token");
5438 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5439 assert_eq!(first.word_string().as_deref(), Some("echo"));
5440
5441 let second = lexer.next_lexed_token().expect("expected second token");
5442 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5443 assert_eq!(
5444 second.word_string().as_deref(),
5445 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5446 );
5447 }
5448
5449 #[test]
5450 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5451 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5452 let start = source.find("$(").expect("expected command substitution") + 2;
5453 let consumed =
5454 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5455 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5456 }
5457
5458 #[test]
5459 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5460 let source = "echo $(echo $(basename $filename .fuzz))";
5461 let start = source.find("$(").expect("expected command substitution") + 2;
5462 let consumed =
5463 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5464 assert_eq!(
5465 &source[start..start + consumed],
5466 "echo $(basename $filename .fuzz))"
5467 );
5468 }
5469
5470 #[test]
5471 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5472 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5473 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5474 assert_eq!(consumed, source.len());
5475 }
5476
5477 #[test]
5478 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5479 let source = "'foo'bar";
5480 let mut lexer = Lexer::new(source);
5481
5482 let token = lexer.next_lexed_token().unwrap();
5483 assert_eq!(token.kind, TokenKind::LiteralWord);
5484
5485 let word = token.word().unwrap();
5486 let segments: Vec<_> = word
5487 .segments()
5488 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5489 .collect();
5490
5491 assert_eq!(
5492 segments,
5493 vec![
5494 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5495 (LexedWordSegmentKind::Plain, "bar".to_string()),
5496 ]
5497 );
5498 assert_eq!(word.joined_text(), "foobar");
5499 assert_eq!(
5500 word.segments()
5501 .nth(1)
5502 .and_then(LexedWordSegment::span)
5503 .unwrap()
5504 .slice(source),
5505 "bar"
5506 );
5507 }
5508
5509 #[test]
5510 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5511 let source = "$(printf hi)";
5512 let mut lexer = Lexer::new(source);
5513
5514 let token = lexer.next_lexed_token().unwrap();
5515 assert_eq!(token.kind, TokenKind::Word);
5516
5517 let word = token.word().unwrap();
5518 let segment = word.single_segment().unwrap();
5519 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5520 assert_eq!(segment.as_str(), source);
5521 assert_eq!(segment.span().unwrap().slice(source), source);
5522 }
5523
5524 #[test]
5525 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5526 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5527 let mut lexer = Lexer::new(source);
5528
5529 let token = lexer.next_lexed_token().unwrap();
5530 assert_eq!(token.kind, TokenKind::Word);
5531
5532 let word = token.word().unwrap();
5533 let segment = word.single_segment().unwrap();
5534 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5535 assert_eq!(segment.as_str(), source);
5536 assert_eq!(segment.span().unwrap().slice(source), source);
5537 }
5538
5539 #[test]
5540 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5541 let source = "\"foo\"$(printf hi)";
5542 let mut lexer = Lexer::new(source);
5543
5544 let token = lexer.next_lexed_token().unwrap();
5545 assert_eq!(token.kind, TokenKind::Word);
5546
5547 let word = token.word().unwrap();
5548 let continuation = word.segments().nth(1).unwrap();
5549 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5550 assert_eq!(continuation.as_str(), "$(printf hi)");
5551 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5552 }
5553
5554 #[test]
5555 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5556 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5557 let mut lexer = Lexer::new(source);
5558
5559 let token = lexer.next_lexed_token().unwrap();
5560 assert_eq!(token.kind, TokenKind::QuotedWord);
5561
5562 let word = token.word().unwrap();
5563 let segment = word.single_segment().unwrap();
5564 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5565 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5566 assert_eq!(
5567 segment.span().unwrap().slice(source),
5568 "${arr[$RANDOM % ${#arr[@]}]}"
5569 );
5570 }
5571
5572 #[test]
5573 fn test_ansi_c_control_escape_can_consume_quote() {
5574 let mut lexer = Lexer::new("echo $'\\c''");
5575
5576 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5577 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5578 assert!(lexer.next_lexed_token().is_none());
5579 }
5580
5581 #[test]
5582 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5583 let source = r#"out_line="${out_line//'"'/'\"'}"
5584"#;
5585 let mut lexer = Lexer::new(source);
5586
5587 assert_next_token(
5588 &mut lexer,
5589 TokenKind::Word,
5590 Some(r#"out_line=${out_line//'"'/'"'}"#),
5591 );
5592 assert_next_token(&mut lexer, TokenKind::Newline, None);
5593 assert!(lexer.next_lexed_token().is_none());
5594 }
5595
5596 #[test]
5597 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5598 let source = r#"out_line="${out_line//'"'/'\"'}"
5599echo "Error: Missing python3!"
5600cat << 'EOF' > "${pywrapper}"
5601import os
5602EOF
5603"#;
5604 let mut lexer = Lexer::new(source);
5605
5606 assert_next_token(
5607 &mut lexer,
5608 TokenKind::Word,
5609 Some(r#"out_line=${out_line//'"'/'"'}"#),
5610 );
5611 assert_next_token(&mut lexer, TokenKind::Newline, None);
5612 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5613 assert_next_token(
5614 &mut lexer,
5615 TokenKind::QuotedWord,
5616 Some("Error: Missing python3!"),
5617 );
5618 assert_next_token(&mut lexer, TokenKind::Newline, None);
5619 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5620 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5621 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5622 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5623 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5624 }
5625
5626 #[test]
5627 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5628 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5629 let mut lexer = Lexer::new(source);
5630
5631 let token = lexer.next_lexed_token().unwrap();
5632 assert_eq!(token.kind, TokenKind::Word);
5633 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5634 assert!(token.source_slice(source).is_none());
5635 assert_eq!(
5636 token.word_string().as_deref(),
5637 Some("crypt=${crypt//\\/\\\\}")
5638 );
5639 assert_next_token(&mut lexer, TokenKind::Newline, None);
5640 assert!(lexer.next_lexed_token().is_none());
5641 }
5642
5643 #[test]
5644 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5645 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5646 let mut lexer = Lexer::new(source);
5647
5648 assert_next_token(
5649 &mut lexer,
5650 TokenKind::Word,
5651 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5652 );
5653 assert_next_token(&mut lexer, TokenKind::Newline, None);
5654 assert_next_token(&mut lexer, TokenKind::Newline, None);
5655 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5656 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5657 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5658 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5659 assert_next_token(&mut lexer, TokenKind::Newline, None);
5660 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5661 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5662 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5663 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5664 assert_next_token(&mut lexer, TokenKind::Newline, None);
5665 assert_next_token(
5666 &mut lexer,
5667 TokenKind::Word,
5668 Some(
5669 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5670 ),
5671 );
5672 assert_next_token(&mut lexer, TokenKind::Newline, None);
5673 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5674 assert_next_token(&mut lexer, TokenKind::Newline, None);
5675 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5676 assert_next_token(&mut lexer, TokenKind::Newline, None);
5677 assert!(lexer.next_lexed_token().is_none());
5678 }
5679
5680 #[test]
5681 fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5682 let source = "case \"$word\" in\n {) : ;;\n :) : ;;\nesac\n";
5683 let mut lexer = Lexer::new(source);
5684
5685 assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5686 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5687 assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5688 assert_next_token(&mut lexer, TokenKind::Newline, None);
5689 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5690 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5691 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5692 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5693 assert_next_token(&mut lexer, TokenKind::Newline, None);
5694 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5695 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5696 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5697 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5698 assert_next_token(&mut lexer, TokenKind::Newline, None);
5699 assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5700 assert_next_token(&mut lexer, TokenKind::Newline, None);
5701 assert!(lexer.next_lexed_token().is_none());
5702 }
5703
5704 #[test]
5705 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5706 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5707 let mut lexer = Lexer::new(source);
5708
5709 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5710 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5711 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5712 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5713 assert_next_token(&mut lexer, TokenKind::And, None);
5714 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5715 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5716 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5717 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5718 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5719 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5720 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5721 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5722 assert_next_token(&mut lexer, TokenKind::Newline, None);
5723 assert!(lexer.next_lexed_token().is_none());
5724 }
5725
5726 #[test]
5727 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5728 let source = "echo -{$(echo a),b}-\n";
5729 let mut lexer = Lexer::new(source);
5730
5731 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5732 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5733 assert_next_token(&mut lexer, TokenKind::Newline, None);
5734 assert!(lexer.next_lexed_token().is_none());
5735 }
5736
5737 #[test]
5738 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5739 let source = "echo -{$((1 + 2)),b}-\n";
5740 let mut lexer = Lexer::new(source);
5741
5742 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5743 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5744 assert_next_token(&mut lexer, TokenKind::Newline, None);
5745 assert!(lexer.next_lexed_token().is_none());
5746 }
5747
5748 #[test]
5749 fn test_operators() {
5750 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5751
5752 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5753 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5754 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5755 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5756 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5757 assert_next_token(&mut lexer, TokenKind::And, None);
5758 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5759 assert_next_token(&mut lexer, TokenKind::Or, None);
5760 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5761 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5762 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5763 assert_next_token(&mut lexer, TokenKind::Background, None);
5764 assert!(lexer.next_lexed_token().is_none());
5765 }
5766
5767 #[test]
5768 fn test_double_left_bracket_requires_separator() {
5769 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5770
5771 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5772 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5773 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5774 assert_next_token(&mut lexer, TokenKind::Newline, None);
5775 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5776 assert_next_token(&mut lexer, TokenKind::Newline, None);
5777 assert!(lexer.next_lexed_token().is_none());
5778 }
5779
5780 #[test]
5781 fn test_redirects() {
5782 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5783
5784 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5785 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5786 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5787 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5788 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5789 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5790 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5791 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5792 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5793 let token = lexer.next_lexed_token().unwrap();
5794 assert_eq!(token.kind, TokenKind::Clobber);
5795 assert_eq!(token.fd_value(), Some(2));
5796 assert_eq!(token_text(&token, lexer.input), None);
5797 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5798 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5799 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5800 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5801 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5802 assert_next_token(&mut lexer, TokenKind::HereString, None);
5803 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5804 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5805 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5806 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5807 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5808 }
5809
5810 #[test]
5811 fn test_comment() {
5812 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5813
5814 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5815 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5816 assert_next_token(&mut lexer, TokenKind::Newline, None);
5817 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5818 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5819 }
5820
5821 #[test]
5822 fn test_comment_token_with_span() {
5823 let mut lexer = Lexer::new("# lead\necho hi # tail");
5824
5825 let comment = lexer.next_lexed_token_with_comments().unwrap();
5826 assert_eq!(comment.kind, TokenKind::Comment);
5827 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5828 assert_eq!(comment.span.start.line, 1);
5829 assert_eq!(comment.span.start.column, 1);
5830 assert_eq!(comment.span.end.line, 1);
5831 assert_eq!(comment.span.end.column, 7);
5832
5833 assert_next_token(&mut lexer, TokenKind::Newline, None);
5834 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5835 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5836
5837 let inline = lexer.next_lexed_token_with_comments().unwrap();
5838 assert_eq!(inline.kind, TokenKind::Comment);
5839 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5840 assert_eq!(inline.span.start.line, 2);
5841 assert_eq!(inline.span.start.column, 9);
5842 }
5843
5844 #[test]
5845 fn test_comment_token_preserves_hash_boundaries() {
5846 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5847
5848 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5849 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5850 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5851 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5852 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5853 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5854 assert!(lexer.next_lexed_token_with_comments().is_none());
5855 }
5856
5857 #[test]
5858 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5859 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5860
5861 let mut saw_comment = false;
5862 while let Some(token) = lexer.next_lexed_token_with_comments() {
5863 if token.kind == TokenKind::Comment {
5864 saw_comment = true;
5865 break;
5866 }
5867 }
5868
5869 assert!(
5870 !saw_comment,
5871 "zsh inline glob controls inside [[ ]] should not lex as comments"
5872 );
5873 }
5874
5875 #[test]
5876 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5877 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5878
5879 let mut saw_comment = false;
5880 while let Some(token) = lexer.next_lexed_token_with_comments() {
5881 if token.kind == TokenKind::Comment {
5882 saw_comment = true;
5883 break;
5884 }
5885 }
5886
5887 assert!(
5888 !saw_comment,
5889 "zsh arithmetic char literals inside (( )) should not lex as comments"
5890 );
5891 }
5892
5893 #[test]
5894 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5895 let mut lexer = Lexer::new(
5896 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5897 );
5898
5899 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5900 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5901 assert_next_token(
5902 &mut lexer,
5903 TokenKind::LiteralWord,
5904 Some("\\e]133;C;cmdline_url=%s\\a"),
5905 );
5906 assert_next_token(
5907 &mut lexer,
5908 TokenKind::QuotedWord,
5909 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5910 );
5911 assert_next_token(&mut lexer, TokenKind::Newline, None);
5912 }
5913
5914 #[test]
5915 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5916 let mut lexer = Lexer::new(
5917 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5918 );
5919
5920 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5921 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5922 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5923 assert_next_token(&mut lexer, TokenKind::Newline, None);
5924 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5925 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5926 assert_next_token(
5927 &mut lexer,
5928 TokenKind::LiteralWord,
5929 Some("\\e]133;C;cmdline_url=%s\\a"),
5930 );
5931 assert_next_token(
5932 &mut lexer,
5933 TokenKind::QuotedWord,
5934 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5935 );
5936 assert_next_token(&mut lexer, TokenKind::Newline, None);
5937 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5938 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5939 assert_next_token(&mut lexer, TokenKind::Newline, None);
5940 }
5941
5942 #[test]
5943 fn test_variable_words() {
5944 let mut lexer = Lexer::new("echo $HOME $USER");
5945
5946 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5947 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5948 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5949 assert!(lexer.next_lexed_token().is_none());
5950 }
5951
5952 #[test]
5953 fn test_pipeline_tokens() {
5954 let mut lexer = Lexer::new("echo hello | cat");
5955
5956 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5957 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5958 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5959 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5960 assert!(lexer.next_lexed_token().is_none());
5961 }
5962
5963 #[test]
5964 fn test_read_heredoc() {
5965 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5967 let content = lexer.read_heredoc("EOF", false);
5968 assert_eq!(content.content, "hello\nworld\n");
5969 }
5970
5971 #[test]
5972 fn test_read_heredoc_single_line() {
5973 let mut lexer = Lexer::new("\ntest\nEOF");
5974 let content = lexer.read_heredoc("EOF", false);
5975 assert_eq!(content.content, "test\n");
5976 }
5977
5978 #[test]
5979 fn test_read_heredoc_full_scenario() {
5980 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5982
5983 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5985 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5986 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5987
5988 let content = lexer.read_heredoc("EOF", false);
5990 assert_eq!(content.content, "hello\nworld\n");
5991 }
5992
5993 #[test]
5994 fn test_read_heredoc_with_redirect() {
5995 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5997 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5998 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5999 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6000 let content = lexer.read_heredoc("EOF", false);
6001 assert_eq!(content.content, "hello\n");
6002 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6004 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
6005 }
6006
6007 #[test]
6008 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
6009 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
6010 let mut lexer = Lexer::new(source);
6011
6012 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6013 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6014 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6015
6016 let heredoc = lexer.read_heredoc("EOF", false);
6017 assert_eq!(heredoc.content, "hello\n");
6018
6019 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6020 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
6021 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6022 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6023 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
6024 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6025 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
6026 }
6027
6028 #[test]
6029 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
6030 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
6031 let mut lexer = Lexer::new(source);
6032
6033 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6034 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6035 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6036
6037 let heredoc = lexer.read_heredoc("EOF", false);
6038 assert_eq!(heredoc.content, "1\n2\n3\n");
6039 }
6040
6041 #[test]
6042 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
6043 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
6044 let mut lexer = Lexer::new(source);
6045
6046 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6047 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6048 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6049
6050 let heredoc = lexer.read_heredoc("EOF", false);
6051 assert_eq!(heredoc.content, "body\n");
6052 }
6053
6054 #[test]
6055 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
6056 let source = "cat <<EOF # note \\\nbody\nEOF\n";
6057 let mut lexer = Lexer::new(source);
6058
6059 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6060 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6061 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6062
6063 let heredoc = lexer.read_heredoc("EOF", false);
6064 assert_eq!(heredoc.content, "body\n");
6065 }
6066
6067 #[test]
6068 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
6069 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
6070 let mut lexer = Lexer::new(source);
6071
6072 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6073 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6074 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6075 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6076
6077 let heredoc = lexer.read_heredoc("EOF", false);
6078 assert_eq!(heredoc.content, "body\n");
6079
6080 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6081 }
6082
6083 #[test]
6084 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
6085 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
6086 let mut lexer = Lexer::new(source);
6087
6088 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6089 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6090 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6091
6092 let heredoc = lexer.read_heredoc("EOF", false);
6093 assert_eq!(heredoc.content, "1\n");
6094
6095 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6096 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
6097 }
6098
6099 #[test]
6100 fn test_read_heredoc_with_redirect_preserves_following_spans() {
6101 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
6102 let mut lexer = Lexer::new(source);
6103
6104 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6105 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6106 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6107
6108 let heredoc = lexer.read_heredoc("EOF", false);
6109 assert_eq!(heredoc.content, "hello\n");
6110
6111 let redirect = lexer.next_lexed_token_with_comments().unwrap();
6112 assert_eq!(redirect.kind, TokenKind::RedirectOut);
6113 assert_eq!(redirect.span.slice(source), ">");
6114
6115 let target = lexer.next_lexed_token_with_comments().unwrap();
6116 assert_eq!(target.kind, TokenKind::Word);
6117 assert_eq!(
6118 token_text(&target, lexer.input).as_deref(),
6119 Some("file.txt")
6120 );
6121 assert_eq!(target.span.slice(source), "file.txt");
6122
6123 let newline = lexer.next_lexed_token_with_comments().unwrap();
6124 assert_eq!(newline.kind, TokenKind::Newline);
6125 assert_eq!(newline.span.slice(source), "\n");
6126
6127 let comment = lexer.next_lexed_token_with_comments().unwrap();
6128 assert_eq!(comment.kind, TokenKind::Comment);
6129 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
6130 assert_eq!(comment.span.slice(source), "# done");
6131 }
6132
6133 #[test]
6134 fn test_comment_with_unicode() {
6135 let source = "# café résumé\necho ok";
6137 let mut lexer = Lexer::new(source);
6138
6139 let comment = lexer.next_lexed_token_with_comments().unwrap();
6140 assert_eq!(comment.kind, TokenKind::Comment);
6141 assert_eq!(
6142 token_text(&comment, lexer.input).as_deref(),
6143 Some(" café résumé")
6144 );
6145 let start = comment.span.start.offset;
6147 let end = comment.span.end.offset;
6148 assert_eq!(start, 0);
6149 assert_eq!(&source[start..end], "# café résumé");
6150 assert!(source.is_char_boundary(start));
6151 assert!(source.is_char_boundary(end));
6152
6153 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6154 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6155 }
6156
6157 #[test]
6158 fn test_comment_with_cjk_characters() {
6159 let source = "# 你好世界\necho ok";
6161 let mut lexer = Lexer::new(source);
6162
6163 let comment = lexer.next_lexed_token_with_comments().unwrap();
6164 assert_eq!(comment.kind, TokenKind::Comment);
6165 assert_eq!(
6166 token_text(&comment, lexer.input).as_deref(),
6167 Some(" 你好世界")
6168 );
6169 let start = comment.span.start.offset;
6170 let end = comment.span.end.offset;
6171 assert_eq!(&source[start..end], "# 你好世界");
6172 assert!(source.is_char_boundary(start));
6173 assert!(source.is_char_boundary(end));
6174 }
6175
6176 #[test]
6177 fn test_heredoc_with_comments_inside() {
6178 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6180 let mut lexer = Lexer::new(source);
6181
6182 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6183 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6184 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6185
6186 let heredoc = lexer.read_heredoc("EOF", false);
6187 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6188
6189 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6192 let comment = lexer.next_lexed_token_with_comments().unwrap();
6193 assert_eq!(comment.kind, TokenKind::Comment);
6194 assert_eq!(
6195 token_text(&comment, lexer.input).as_deref(),
6196 Some(" real comment")
6197 );
6198 }
6199
6200 #[test]
6201 fn test_heredoc_with_hash_in_variable() {
6202 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6204 let mut lexer = Lexer::new(source);
6205
6206 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6207 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6208 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6209
6210 let heredoc = lexer.read_heredoc("EOF", false);
6211 assert_eq!(heredoc.content, "val=${x#prefix}\n");
6212 }
6213
6214 #[test]
6215 fn test_heredoc_span_does_not_leak() {
6216 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6219 let mut lexer = Lexer::new(source);
6220
6221 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6222 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6223 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6224
6225 let heredoc = lexer.read_heredoc("EOF", false);
6226 let start = heredoc.content_span.start.offset;
6227 let end = heredoc.content_span.end.offset;
6228 assert!(
6229 end <= source.len(),
6230 "heredoc span end ({end}) exceeds source length ({})",
6231 source.len()
6232 );
6233 assert_eq!(&source[start..end], "hello\nworld\n");
6234
6235 assert_next_token(&mut lexer, TokenKind::Newline, None);
6237 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6238 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6239 }
6240
6241 #[test]
6242 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6243 let source = "\
6244cat <<\\_ACEOF
6245Use these variables to override the choices made by `configure' or to help
6246it to find libraries and programs with nonstandard names/locations.
6247_ACEOF
6248ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6249ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6250";
6251 let mut lexer = Lexer::new(source);
6252
6253 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6254 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6255 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6256 assert_eq!(delimiter.kind, TokenKind::Word);
6257 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6258
6259 let heredoc = lexer.read_heredoc("_ACEOF", false);
6260 assert_eq!(
6261 heredoc.content,
6262 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6263 );
6264
6265 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6266
6267 let first = lexer.next_lexed_token_with_comments().unwrap();
6268 assert_eq!(first.kind, TokenKind::Word);
6269 assert_eq!(
6270 first.span.slice(source),
6271 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6272 );
6273 let first_segments = first
6274 .word()
6275 .unwrap()
6276 .segments()
6277 .map(|segment| {
6278 (
6279 segment.kind(),
6280 segment.as_str().to_string(),
6281 segment.span().map(|span| span.slice(source).to_string()),
6282 )
6283 })
6284 .collect::<Vec<_>>();
6285 assert_eq!(
6286 first_segments,
6287 vec![
6288 (
6289 LexedWordSegmentKind::Plain,
6290 "ac_dir_suffix=/".to_string(),
6291 Some("ac_dir_suffix=/".to_string()),
6292 ),
6293 (
6294 LexedWordSegmentKind::Plain,
6295 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6296 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6297 ),
6298 ]
6299 );
6300
6301 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6302
6303 let second = lexer.next_lexed_token_with_comments().unwrap();
6304 assert_eq!(second.kind, TokenKind::Word);
6305 assert_eq!(
6306 second.span.slice(source),
6307 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6308 );
6309 let second_segments = second
6310 .word()
6311 .unwrap()
6312 .segments()
6313 .map(|segment| {
6314 (
6315 segment.kind(),
6316 segment.as_str().to_string(),
6317 segment.span().map(|span| span.slice(source).to_string()),
6318 )
6319 })
6320 .collect::<Vec<_>>();
6321 assert_eq!(
6322 second_segments,
6323 vec![
6324 (
6325 LexedWordSegmentKind::Plain,
6326 "ac_top_builddir_sub=".to_string(),
6327 Some("ac_top_builddir_sub=".to_string()),
6328 ),
6329 (
6330 LexedWordSegmentKind::Plain,
6331 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6332 Some(
6333 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6334 .to_string(),
6335 ),
6336 ),
6337 ]
6338 );
6339 }
6340
6341 #[test]
6342 fn test_heredoc_with_unicode_content() {
6343 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6345 let mut lexer = Lexer::new(source);
6346
6347 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6348 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6349 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6350
6351 let heredoc = lexer.read_heredoc("EOF", false);
6352 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6353 let start = heredoc.content_span.start.offset;
6354 let end = heredoc.content_span.end.offset;
6355 assert!(
6356 source.is_char_boundary(start),
6357 "heredoc span start ({start}) not on char boundary"
6358 );
6359 assert!(
6360 source.is_char_boundary(end),
6361 "heredoc span end ({end}) not on char boundary"
6362 );
6363 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6364 }
6365
6366 #[test]
6367 fn test_assoc_compound_assignment() {
6368 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6371 assert_next_token(
6372 &mut lexer,
6373 TokenKind::Word,
6374 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6375 );
6376 assert!(lexer.next_lexed_token().is_none());
6377 }
6378
6379 #[test]
6380 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6381 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6382 let mut lexer = Lexer::new(source);
6383
6384 let token = lexer.next_lexed_token().unwrap();
6385 assert_eq!(token.kind, TokenKind::Word);
6386 assert_eq!(token.span.slice(source), source);
6387 assert!(lexer.next_lexed_token().is_none());
6388 }
6389
6390 #[test]
6391 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6392 let source = r#"foo\_bar@(baz|qux)"#;
6393 let mut lexer = Lexer::new(source);
6394
6395 let token = lexer.next_lexed_token().unwrap();
6396 assert_eq!(token.kind, TokenKind::Word);
6397 assert_eq!(token.span.slice(source), source);
6398 assert!(lexer.next_lexed_token().is_none());
6399 }
6400
6401 #[test]
6402 fn test_indexed_array_not_collapsed() {
6403 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6406 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6407 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6408 }
6409
6410 #[test]
6411 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6412 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6413 let mut lexer = Lexer::new(source);
6414
6415 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6416 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6417
6418 let token = lexer.next_lexed_token().unwrap();
6419 assert_eq!(token.kind, TokenKind::Word);
6420 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6421
6422 let word = token.word().unwrap();
6423 let segments: Vec<_> = word
6424 .segments()
6425 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6426 .collect();
6427 assert_eq!(
6428 segments,
6429 vec![
6430 (
6431 LexedWordSegmentKind::DoubleQuoted,
6432 "$plugin_dir".to_string()
6433 ),
6434 (LexedWordSegmentKind::Plain, "/*".to_string()),
6435 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6436 ]
6437 );
6438
6439 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6440 assert!(lexer.next_lexed_token().is_none());
6441 }
6442
6443 #[test]
6444 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6445 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6446 let mut lexer = Lexer::new(source);
6447
6448 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6449 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6450
6451 let token = lexer.next_lexed_token().unwrap();
6452 assert_eq!(token.kind, TokenKind::Word);
6453 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6454
6455 let word = token.word().unwrap();
6456 let segments: Vec<_> = word
6457 .segments()
6458 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6459 .collect();
6460 assert_eq!(
6461 segments,
6462 vec![
6463 (
6464 LexedWordSegmentKind::DoubleQuoted,
6465 "$__GREP_CACHE_FILE".to_string()
6466 ),
6467 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6468 ]
6469 );
6470
6471 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6472 assert!(lexer.next_lexed_token().is_none());
6473 }
6474
6475 #[test]
6476 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6477 let source = r#"$dir/${~pats}(N)"#;
6478 let mut lexer = Lexer::new(source);
6479
6480 let token = lexer.next_lexed_token().unwrap();
6481 assert_eq!(token.kind, TokenKind::Word);
6482 assert_eq!(token.span.slice(source), source);
6483 assert!(lexer.next_lexed_token().is_none());
6484 }
6485
6486 #[test]
6487 fn test_dollar_word_does_not_absorb_function_parens() {
6488 let mut lexer = Lexer::new(r#"foo$x()"#);
6489
6490 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6491 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6492 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6493 assert!(lexer.next_lexed_token().is_none());
6494 }
6495
6496 #[test]
6497 fn test_command_substitution_word_does_not_absorb_function_parens() {
6498 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6499
6500 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6501 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6502 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6503 assert!(lexer.next_lexed_token().is_none());
6504 }
6505
6506 #[test]
6509 fn test_digit_at_eof_no_panic() {
6510 let mut lexer = Lexer::new("2");
6512 let token = lexer.next_lexed_token();
6513 assert!(token.is_some());
6514 }
6515
6516 #[test]
6518 fn test_nested_brace_expansion_single_token() {
6519 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6521 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6522 assert!(lexer.next_lexed_token().is_none());
6524 }
6525
6526 #[test]
6528 fn test_simple_brace_expansion_unchanged() {
6529 let mut lexer = Lexer::new("${foo}");
6530 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6531 assert!(lexer.next_lexed_token().is_none());
6532 }
6533
6534 #[test]
6535 fn test_nvm_fixture_lexes_without_stalling() {
6536 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6537 let mut lexer = Lexer::new(input);
6538 let mut tokens = 0usize;
6539
6540 while lexer.next_lexed_token().is_some() {
6541 tokens += 1;
6542 assert!(
6543 tokens < 100_000,
6544 "lexer should continue making progress on the nvm fixture"
6545 );
6546 }
6547
6548 assert!(tokens > 0, "nvm fixture should produce at least one token");
6549 }
6550
6551 #[test]
6552 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6553 let input = concat!(
6554 "case \"${_input_type:-}\" in\n",
6555 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6556 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6557 "esac\n",
6558 );
6559
6560 assert_non_newline_tokens_stay_on_one_line(input);
6561
6562 let mut lexer = Lexer::new(input);
6563 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6564 .map(|token| (token.kind, token_text(&token, input)))
6565 .collect::<Vec<_>>();
6566 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6567 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6568 }
6569
6570 #[test]
6571 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6572 let input = concat!(
6573 "case $2 in\n",
6574 " cygwin*) bin='cygwin32/bin' ;|\n",
6575 "esac\n",
6576 );
6577
6578 let mut lexer = Lexer::new(input);
6579 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6580 .map(|token| (token.kind, token_text(&token, input)))
6581 .collect::<Vec<_>>();
6582
6583 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6584 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6585 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6586 }
6587
6588 #[test]
6589 fn test_inline_if_with_array_append_stays_line_local() {
6590 let input = concat!(
6591 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6592 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6593 );
6594
6595 assert_non_newline_tokens_stay_on_one_line(input);
6596 }
6597
6598 #[test]
6599 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6600 let source = "unsetopt interactive_comments\n#literal\n";
6601 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6602 let mut lexer = Lexer::with_profile(source, &profile);
6603
6604 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6605 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6606 assert_next_token(&mut lexer, TokenKind::Newline, None);
6607 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6608 }
6609
6610 #[test]
6611 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6612 let source = "setopt rc_quotes\nprint 'a''b'\n";
6613 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6614 let mut lexer = Lexer::with_profile(source, &profile);
6615
6616 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6617 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6618 assert_next_token(&mut lexer, TokenKind::Newline, None);
6619 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6620 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6621 }
6622
6623 #[test]
6624 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6625 let source = "setopt ignore_braces\n{ echo }\n";
6626 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6627 let mut lexer = Lexer::with_profile(source, &profile);
6628
6629 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6630 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6631 assert_next_token(&mut lexer, TokenKind::Newline, None);
6632 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6633 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6634 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6635 }
6636
6637 #[test]
6638 fn test_heredoc_in_arithmetic_fuzz_crash() {
6639 let data: &[u8] = &[
6643 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6644 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6645 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6646 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6647 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6648 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6649 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6650 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6651 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6652 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6653 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6654 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6655 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6656 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6657 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6658 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6659 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6660 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6661 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6662 ];
6663 let input = std::str::from_utf8(data).unwrap();
6664 let script = format!("echo $(({input}))\n");
6665 let _ = crate::parser::Parser::new(&script).parse();
6667 }
6668}