1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub(crate) enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub(crate) struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub(crate) fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub(crate) const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub(crate) const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub(crate) fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub(crate) struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub(crate) fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub(crate) fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub(crate) fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub(crate) fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub(crate) const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
338pub struct LexedToken<'a> {
339 pub kind: TokenKind,
341 pub span: Span,
343 pub(crate) flags: TokenFlags,
344 payload: TokenPayload<'a>,
345}
346
347impl<'a> LexedToken<'a> {
348 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
349 match kind {
350 TokenKind::Word => LexedWordSegmentKind::Plain,
351 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
352 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
353 _ => LexedWordSegmentKind::Composite,
354 }
355 }
356
357 pub(crate) fn punctuation(kind: TokenKind) -> Self {
358 Self {
359 kind,
360 span: Span::new(),
361 flags: TokenFlags::empty(),
362 payload: TokenPayload::None,
363 }
364 }
365
366 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
367 let flags = if word.has_cooked_text() {
368 TokenFlags::cooked_text()
369 } else {
370 TokenFlags::empty()
371 };
372
373 Self {
374 kind,
375 span: Span::new(),
376 flags,
377 payload: TokenPayload::Word(word),
378 }
379 }
380
381 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
382 Self::with_word_payload(
383 kind,
384 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
385 )
386 }
387
388 fn owned_word(kind: TokenKind, text: String) -> Self {
389 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
390 }
391
392 fn comment() -> Self {
393 Self {
394 kind: TokenKind::Comment,
395 span: Span::new(),
396 flags: TokenFlags::empty(),
397 payload: TokenPayload::None,
398 }
399 }
400
401 fn fd(kind: TokenKind, fd: i32) -> Self {
402 Self {
403 kind,
404 span: Span::new(),
405 flags: TokenFlags::empty(),
406 payload: TokenPayload::Fd(fd),
407 }
408 }
409
410 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
411 Self {
412 kind,
413 span: Span::new(),
414 flags: TokenFlags::empty(),
415 payload: TokenPayload::FdPair(src_fd, dst_fd),
416 }
417 }
418
419 fn error(kind: LexerErrorKind) -> Self {
420 Self {
421 kind: TokenKind::Error,
422 span: Span::new(),
423 flags: TokenFlags::empty(),
424 payload: TokenPayload::Error(kind),
425 }
426 }
427
428 pub(crate) fn with_span(mut self, span: Span) -> Self {
429 self.span = span;
430 self
431 }
432
433 pub(crate) fn rebased(mut self, base: Position) -> Self {
434 self.span = self.span.rebased(base);
435 self.payload = match self.payload {
436 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
437 payload => payload,
438 };
439 self
440 }
441
442 pub(crate) fn with_synthetic_flag(mut self) -> Self {
443 self.flags = self.flags.with_synthetic();
444 self
445 }
446
447 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
448 let payload = match self.payload {
449 TokenPayload::None => TokenPayload::None,
450 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
451 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
452 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
453 TokenPayload::Error(kind) => TokenPayload::Error(kind),
454 };
455
456 LexedToken {
457 kind: self.kind,
458 span: self.span,
459 flags: self.flags,
460 payload,
461 }
462 }
463
464 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
465 let payload = match self.payload {
466 TokenPayload::None => TokenPayload::None,
467 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
468 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
469 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
470 TokenPayload::Error(kind) => TokenPayload::Error(kind),
471 };
472
473 LexedToken {
474 kind: self.kind,
475 span: self.span,
476 flags: self.flags,
477 payload,
478 }
479 }
480
481 pub(crate) fn word_text(&self) -> Option<&str> {
483 self.kind
484 .is_word_like()
485 .then_some(())
486 .and_then(|_| match &self.payload {
487 TokenPayload::Word(word) => word.text(),
488 _ => None,
489 })
490 }
491
492 pub(crate) fn word_string(&self) -> Option<String> {
494 self.kind
495 .is_word_like()
496 .then_some(())
497 .and_then(|_| match &self.payload {
498 TokenPayload::Word(word) => Some(word.joined_text()),
499 _ => None,
500 })
501 }
502
503 pub(crate) fn word(&self) -> Option<&LexedWord<'a>> {
505 match &self.payload {
506 TokenPayload::Word(word) => Some(word),
507 _ => None,
508 }
509 }
510
511 pub(crate) fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
513 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
514 return None;
515 }
516
517 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
518 .then(|| &source[self.span.start.offset..self.span.end.offset])
519 }
520
521 pub(crate) fn fd_value(&self) -> Option<i32> {
523 match self.payload {
524 TokenPayload::Fd(fd) => Some(fd),
525 _ => None,
526 }
527 }
528
529 pub(crate) fn fd_pair_value(&self) -> Option<(i32, i32)> {
531 match self.payload {
532 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
533 _ => None,
534 }
535 }
536
537 pub(crate) fn error_kind(&self) -> Option<LexerErrorKind> {
539 match self.payload {
540 TokenPayload::Error(kind) => Some(kind),
541 _ => None,
542 }
543 }
544}
545
546#[derive(Debug, Clone, PartialEq)]
548pub(crate) struct HeredocRead {
549 pub content: String,
551 pub content_span: Span,
553}
554
555const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
558const MAX_PARAMETER_EXPANSION_SCAN_DEPTH: usize = 4;
559
560#[derive(Clone, Debug)]
561struct Cursor<'a> {
562 rest: &'a str,
563}
564
565impl<'a> Cursor<'a> {
566 fn new(source: &'a str) -> Self {
567 Self { rest: source }
568 }
569
570 fn first(&self) -> Option<char> {
571 self.rest.chars().next()
572 }
573
574 fn second(&self) -> Option<char> {
575 let mut chars = self.rest.chars();
576 chars.next()?;
577 chars.next()
578 }
579
580 fn third(&self) -> Option<char> {
581 let mut chars = self.rest.chars();
582 chars.next()?;
583 chars.next()?;
584 chars.next()
585 }
586
587 fn bump(&mut self) -> Option<char> {
588 let ch = self.first()?;
589 self.rest = &self.rest[ch.len_utf8()..];
590 Some(ch)
591 }
592
593 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
594 let start = self.rest;
595 let mut end = 0;
596
597 for ch in start.chars() {
598 if !predicate(ch) {
599 break;
600 }
601 end += ch.len_utf8();
602 }
603
604 self.rest = &start[end..];
605 &start[..end]
606 }
607
608 fn rest(&self) -> &'a str {
609 self.rest
610 }
611
612 fn skip_bytes(&mut self, count: usize) {
613 self.rest = &self.rest[count..];
614 }
615
616 fn find_byte(&self, byte: u8) -> Option<usize> {
617 memchr(byte, self.rest.as_bytes())
618 }
619}
620
621#[derive(Clone, Debug)]
622struct PositionMap<'a> {
623 source: &'a str,
624 line_starts: Arc<[usize]>,
625 cached: Position,
626}
627
628#[cfg(feature = "benchmarking")]
629#[derive(Clone, Copy, Debug, Default)]
630pub(crate) struct LexerBenchmarkCounters {
631 pub(crate) current_position_calls: u64,
632}
633
634impl<'a> PositionMap<'a> {
635 fn new(source: &'a str) -> Self {
636 let mut line_starts =
637 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
638 line_starts.push(0);
639 line_starts.extend(
640 source
641 .bytes()
642 .enumerate()
643 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
644 );
645
646 Self {
647 source,
648 line_starts: line_starts.into(),
649 cached: Position::new(),
650 }
651 }
652
653 fn position(&mut self, offset: usize) -> Position {
654 if offset == self.cached.offset {
655 return self.cached;
656 }
657
658 let position = if offset > self.cached.offset && offset <= self.source.len() {
659 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
660 } else {
661 self.position_uncached(offset)
662 };
663 self.cached = position;
664 position
665 }
666
667 fn position_uncached(&self, offset: usize) -> Position {
668 let offset = offset.min(self.source.len());
669 let line_index = self
670 .line_starts
671 .partition_point(|start| *start <= offset)
672 .saturating_sub(1);
673 let line_start = self.line_starts[line_index];
674 let line_text = &self.source[line_start..offset];
675 let column = if line_text.is_ascii() {
676 line_text.len() + 1
677 } else {
678 line_text.chars().count() + 1
679 };
680
681 Position {
682 line: line_index + 1,
683 column,
684 offset,
685 }
686 }
687
688 fn advance_from(mut position: Position, text: &str) -> Position {
689 position.offset += text.len();
690 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
691 if newline_count == 0 {
692 position.column += if text.is_ascii() {
693 text.len()
694 } else {
695 text.chars().count()
696 };
697 return position;
698 }
699
700 position.line += newline_count;
701 let tail_start = memrchr(b'\n', text.as_bytes())
702 .map(|index| index + 1)
703 .unwrap_or_default();
704 let tail = &text[tail_start..];
705 position.column = if tail.is_ascii() {
706 tail.len() + 1
707 } else {
708 tail.chars().count() + 1
709 };
710 position
711 }
712}
713
714#[derive(Clone)]
720pub struct Lexer<'a> {
721 #[allow(dead_code)] input: &'a str,
723 offset: usize,
725 cursor: Cursor<'a>,
726 position_map: PositionMap<'a>,
727 reinject_buf: VecDeque<char>,
730 reinject_resume_offset: Option<usize>,
732 max_subst_depth: usize,
734 initial_zsh_options: Option<ZshOptionState>,
735 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
736 zsh_timeline_index: usize,
737 #[cfg(feature = "benchmarking")]
738 benchmark_counters: Option<LexerBenchmarkCounters>,
739}
740
741impl<'a> Lexer<'a> {
742 pub fn new(input: &'a str) -> Self {
744 Self::with_max_subst_depth_and_profile(
745 input,
746 DEFAULT_MAX_SUBST_DEPTH,
747 &ShellProfile::native(super::ShellDialect::Bash),
748 None,
749 )
750 }
751
752 pub(super) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
755 Self::with_max_subst_depth_and_profile(
756 input,
757 max_depth,
758 &ShellProfile::native(super::ShellDialect::Bash),
759 None,
760 )
761 }
762
763 #[cfg(test)]
765 fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
766 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
767 .then(|| ZshOptionTimeline::build(input, shell_profile))
768 .flatten()
769 .map(Arc::new);
770 Self::with_max_subst_depth_and_profile(
771 input,
772 DEFAULT_MAX_SUBST_DEPTH,
773 shell_profile,
774 zsh_timeline,
775 )
776 }
777
778 pub(crate) fn with_max_subst_depth_and_profile(
779 input: &'a str,
780 max_depth: usize,
781 shell_profile: &ShellProfile,
782 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
783 ) -> Self {
784 Self {
785 input,
786 offset: 0,
787 cursor: Cursor::new(input),
788 position_map: PositionMap::new(input),
789 reinject_buf: VecDeque::new(),
790 reinject_resume_offset: None,
791 max_subst_depth: max_depth,
792 initial_zsh_options: shell_profile.zsh_options().cloned(),
793 zsh_timeline,
794 zsh_timeline_index: 0,
795 #[cfg(feature = "benchmarking")]
796 benchmark_counters: None,
797 }
798 }
799
800 pub(super) fn position_at_offset(&self, offset: usize) -> Position {
801 self.position_map.position_uncached(offset)
802 }
803
804 fn current_position(&mut self) -> Position {
805 #[cfg(feature = "benchmarking")]
806 self.maybe_record_current_position_call();
807 self.position_map.position(self.offset)
808 }
809
810 #[cfg(feature = "benchmarking")]
811 pub(crate) fn enable_benchmark_counters(&mut self) {
812 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
813 }
814
815 #[cfg(feature = "benchmarking")]
816 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
817 self.benchmark_counters.unwrap_or_default()
818 }
819
820 #[cfg(feature = "benchmarking")]
821 fn maybe_record_current_position_call(&mut self) {
822 if let Some(counters) = &mut self.benchmark_counters {
823 counters.current_position_calls += 1;
824 }
825 }
826
827 fn sync_offset_to_cursor(&mut self) {
828 if self.reinject_buf.is_empty()
829 && let Some(offset) = self.reinject_resume_offset.take()
830 {
831 self.offset = offset;
832 }
833 }
834
835 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
841 self.next_lexed_token().map(|token| token.kind)
842 }
843
844 fn peek_char(&mut self) -> Option<char> {
845 self.sync_offset_to_cursor();
846 if let Some(&ch) = self.reinject_buf.front() {
847 Some(ch)
848 } else {
849 self.cursor.first()
850 }
851 }
852
853 fn advance(&mut self) -> Option<char> {
854 self.sync_offset_to_cursor();
855 let ch = if !self.reinject_buf.is_empty() {
856 self.reinject_buf.pop_front()
857 } else {
858 self.cursor.bump()
859 };
860 if let Some(c) = ch {
861 self.offset += c.len_utf8();
862 }
863 ch
864 }
865
866 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
867 self.reinject_buf
868 .iter()
869 .copied()
870 .chain(self.cursor.rest().chars())
871 }
872
873 fn second_char(&self) -> Option<char> {
874 match self.reinject_buf.len() {
875 0 => self.cursor.second(),
876 1 => self.cursor.first(),
877 _ => self.reinject_buf.get(1).copied(),
878 }
879 }
880
881 fn third_char(&self) -> Option<char> {
882 match self.reinject_buf.len() {
883 0 => self.cursor.third(),
884 1 => self.cursor.second(),
885 2 => self.cursor.first(),
886 _ => self.reinject_buf.get(2).copied(),
887 }
888 }
889
890 fn fourth_char(&self) -> Option<char> {
891 match self.reinject_buf.len() {
892 0 => self.cursor.rest().chars().nth(3),
893 1 => self.cursor.third(),
894 2 => self.cursor.second(),
895 3 => self.cursor.first(),
896 _ => self.reinject_buf.get(3).copied(),
897 }
898 }
899
900 fn consume_source_bytes(&mut self, byte_len: usize) {
901 debug_assert!(self.reinject_buf.is_empty());
902 self.sync_offset_to_cursor();
903 self.offset += byte_len;
904 self.cursor.skip_bytes(byte_len);
905 }
906
907 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
908 debug_assert!(self.reinject_buf.is_empty());
909 self.offset += byte_len;
910 }
911
912 fn consume_ascii_chars(&mut self, count: usize) {
913 if self.reinject_buf.is_empty() {
914 self.consume_source_bytes(count);
915 return;
916 }
917
918 for _ in 0..count {
919 self.advance();
920 }
921 }
922
923 fn source_horizontal_whitespace_len(&self) -> usize {
924 self.cursor
925 .rest()
926 .as_bytes()
927 .iter()
928 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
929 .count()
930 }
931
932 fn source_ascii_plain_word_len(&self) -> usize {
933 self.cursor
934 .rest()
935 .as_bytes()
936 .iter()
937 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
938 .count()
939 }
940
941 fn find_double_quote_special(source: &str) -> Option<usize> {
942 source
943 .as_bytes()
944 .iter()
945 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
946 }
947
948 fn ensure_capture_from_source(
949 &self,
950 capture: &mut Option<String>,
951 start: Position,
952 end: Position,
953 ) {
954 if capture.is_none() {
955 *capture = Some(self.input[start.offset..end.offset].to_string());
956 }
957 }
958
959 fn push_capture_char(capture: &mut Option<String>, ch: char) {
960 if let Some(text) = capture.as_mut() {
961 text.push(ch);
962 }
963 }
964
965 fn push_capture_str(capture: &mut Option<String>, text: &str) {
966 if let Some(current) = capture.as_mut() {
967 current.push_str(text);
968 }
969 }
970
971 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
972 if let Some(timeline) = self.zsh_timeline.as_ref() {
973 while self.zsh_timeline_index < timeline.entries.len()
974 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
975 {
976 self.zsh_timeline_index += 1;
977 }
978 return if self.zsh_timeline_index == 0 {
979 self.initial_zsh_options.as_ref()
980 } else {
981 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
982 };
983 }
984
985 self.initial_zsh_options.as_ref()
986 }
987
988 fn comments_enabled(&mut self) -> bool {
989 !self
990 .current_zsh_options()
991 .is_some_and(|options| options.interactive_comments.is_definitely_off())
992 }
993
994 fn rc_quotes_enabled(&mut self) -> bool {
995 self.current_zsh_options()
996 .is_some_and(|options| options.rc_quotes.is_definitely_on())
997 }
998
999 fn ignore_braces_enabled(&mut self) -> bool {
1000 self.current_zsh_options()
1001 .is_some_and(|options| options.ignore_braces.is_definitely_on())
1002 }
1003
1004 fn ignore_close_braces_enabled(&mut self) -> bool {
1005 self.current_zsh_options().is_some_and(|options| {
1006 options.ignore_braces.is_definitely_on()
1007 || options.ignore_close_braces.is_definitely_on()
1008 })
1009 }
1010
1011 fn should_treat_hash_as_word_char(&mut self) -> bool {
1012 if !self.comments_enabled() {
1013 return true;
1014 }
1015 self.reinject_buf.is_empty()
1016 && (self
1017 .input
1018 .get(..self.offset)
1019 .and_then(|prefix| prefix.chars().next_back())
1020 .is_some_and(|prev| {
1021 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1022 })
1023 || self.is_inside_unclosed_double_paren_on_line())
1024 }
1025
1026 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1027 capture
1028 .as_deref()
1029 .unwrap_or(&self.input[start.offset..self.offset])
1030 }
1031
1032 fn current_word_surface_is_single_char(
1033 &self,
1034 start: Position,
1035 capture: &Option<String>,
1036 target: char,
1037 ) -> bool {
1038 let text = self.current_word_text(start, capture);
1039 if !text.contains('\x00') {
1040 let mut encoded = [0; 4];
1041 return text == target.encode_utf8(&mut encoded);
1042 }
1043
1044 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1045 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1046 }
1047
1048 fn current_word_surface_last_char<'b>(
1049 &'b self,
1050 start: Position,
1051 capture: &'b Option<String>,
1052 ) -> Option<char> {
1053 self.current_word_text(start, capture)
1054 .chars()
1055 .rev()
1056 .find(|&ch| ch != '\x00')
1057 }
1058
1059 fn current_word_surface_ends_with_char(
1060 &self,
1061 start: Position,
1062 capture: &Option<String>,
1063 target: char,
1064 ) -> bool {
1065 self.current_word_surface_last_char(start, capture) == Some(target)
1066 }
1067
1068 fn current_word_surface_ends_with_extglob_prefix(
1069 &self,
1070 start: Position,
1071 capture: &Option<String>,
1072 ) -> bool {
1073 self.current_word_surface_last_char(start, capture)
1074 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1075 }
1076
1077 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1083 self.skip_whitespace();
1084 let start = self.current_position();
1085 let token = self.next_lexed_token_inner(false)?;
1086 let end = self.current_position();
1087 Some(token.with_span(Span::from_positions(start, end)))
1088 }
1089
1090 pub(super) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1092 self.skip_whitespace();
1093 let start = self.current_position();
1094 let token = self.next_lexed_token_inner(true)?;
1095 let end = self.current_position();
1096 Some(token.with_span(Span::from_positions(start, end)))
1097 }
1098
1099 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1101 let ch = self.peek_char()?;
1102
1103 match ch {
1104 '\n' => {
1105 self.consume_ascii_chars(1);
1106 Some(LexedToken::punctuation(TokenKind::Newline))
1107 }
1108 ';' => {
1109 if self.second_char() == Some(';') {
1110 if self.third_char() == Some('&') {
1111 self.consume_ascii_chars(3);
1112 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1114 self.consume_ascii_chars(2);
1115 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1117 } else if self.second_char() == Some('|') {
1118 self.consume_ascii_chars(2);
1119 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1121 self.consume_ascii_chars(2);
1122 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1124 self.consume_ascii_chars(1);
1125 Some(LexedToken::punctuation(TokenKind::Semicolon))
1126 }
1127 }
1128 '|' => {
1129 if self.second_char() == Some('|') {
1130 self.consume_ascii_chars(2);
1131 Some(LexedToken::punctuation(TokenKind::Or))
1132 } else if self.second_char() == Some('&') {
1133 self.consume_ascii_chars(2);
1134 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1135 } else {
1136 self.consume_ascii_chars(1);
1137 Some(LexedToken::punctuation(TokenKind::Pipe))
1138 }
1139 }
1140 '&' => {
1141 if self.second_char() == Some('&') {
1142 self.consume_ascii_chars(2);
1143 Some(LexedToken::punctuation(TokenKind::And))
1144 } else if self.second_char() == Some('>') {
1145 if self.third_char() == Some('>') {
1146 self.consume_ascii_chars(3);
1147 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1148 } else {
1149 self.consume_ascii_chars(2);
1150 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1151 }
1152 } else if self.second_char() == Some('|') {
1153 self.consume_ascii_chars(2);
1154 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1155 } else if self.second_char() == Some('!') {
1156 self.consume_ascii_chars(2);
1157 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1158 } else {
1159 self.consume_ascii_chars(1);
1160 Some(LexedToken::punctuation(TokenKind::Background))
1161 }
1162 }
1163 '>' => {
1164 if self.second_char() == Some('>') {
1165 if self.third_char() == Some('|') {
1166 self.consume_ascii_chars(3);
1167 } else {
1168 self.consume_ascii_chars(2);
1169 }
1170 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1171 } else if self.second_char() == Some('|') {
1172 self.consume_ascii_chars(2);
1173 Some(LexedToken::punctuation(TokenKind::Clobber))
1174 } else if self.second_char() == Some('(') {
1175 self.consume_ascii_chars(2);
1176 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1177 } else if self.second_char() == Some('&') {
1178 self.consume_ascii_chars(2);
1179 Some(LexedToken::punctuation(TokenKind::DupOutput))
1180 } else {
1181 self.consume_ascii_chars(1);
1182 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1183 }
1184 }
1185 '<' => {
1186 if self.second_char() == Some('<') {
1187 if self.third_char() == Some('<') {
1188 self.consume_ascii_chars(3);
1189 Some(LexedToken::punctuation(TokenKind::HereString))
1190 } else if self.third_char() == Some('-') {
1191 self.consume_ascii_chars(3);
1192 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1193 } else {
1194 self.consume_ascii_chars(2);
1195 Some(LexedToken::punctuation(TokenKind::HereDoc))
1196 }
1197 } else if self.second_char() == Some('>') {
1198 self.consume_ascii_chars(2);
1199 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1200 } else if self.second_char() == Some('(') {
1201 self.consume_ascii_chars(2);
1202 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1203 } else if self.second_char() == Some('&') {
1204 self.consume_ascii_chars(2);
1205 Some(LexedToken::punctuation(TokenKind::DupInput))
1206 } else {
1207 self.consume_ascii_chars(1);
1208 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1209 }
1210 }
1211 '(' => {
1212 if self.second_char() == Some('(') {
1213 self.consume_ascii_chars(2);
1214 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1215 } else {
1216 self.consume_ascii_chars(1);
1217 Some(LexedToken::punctuation(TokenKind::LeftParen))
1218 }
1219 }
1220 ')' => {
1221 if self.second_char() == Some(')') {
1222 self.consume_ascii_chars(2);
1223 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1224 } else {
1225 self.consume_ascii_chars(1);
1226 Some(LexedToken::punctuation(TokenKind::RightParen))
1227 }
1228 }
1229 '{' => {
1230 let start = self.current_position();
1231 if self.ignore_braces_enabled() {
1232 self.consume_ascii_chars(1);
1233 match self.peek_char() {
1234 Some(' ') | Some('\t') | Some('\n') | None => {
1235 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1236 }
1237 _ => self.read_word_starting_with("{", start),
1238 }
1239 } else if self.looks_like_brace_expansion() {
1240 self.read_brace_expansion_word()
1244 } else if self.is_brace_group_start() {
1245 self.advance();
1246 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1247 } else if self.brace_literal_starts_case_pattern_delimiter() {
1248 self.read_word_starting_with("{", start)
1249 } else {
1250 self.read_brace_literal_word()
1251 }
1252 }
1253 '}' => {
1254 self.consume_ascii_chars(1);
1255 if self.ignore_close_braces_enabled() {
1256 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1257 } else {
1258 Some(LexedToken::punctuation(TokenKind::RightBrace))
1259 }
1260 }
1261 '[' => {
1262 let start = self.current_position();
1263 self.consume_ascii_chars(1);
1264 if self.peek_char() == Some('[')
1265 && matches!(
1266 self.second_char(),
1267 Some(' ') | Some('\t') | Some('\n') | None
1268 )
1269 {
1270 self.consume_ascii_chars(1);
1271 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1272 } else {
1273 match self.peek_char() {
1280 Some(' ') | Some('\t') | Some('\n') | None => {
1281 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1282 }
1283 _ => self.read_word_starting_with("[", start),
1284 }
1285 }
1286 }
1287 ']' => {
1288 if self.second_char() == Some(']') {
1289 self.consume_ascii_chars(2);
1290 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1291 } else {
1292 self.consume_ascii_chars(1);
1293 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1294 }
1295 }
1296 '\'' => self.read_single_quoted_string(),
1297 '"' => self.read_double_quoted_string(),
1298 '#' => {
1299 if self.should_treat_hash_as_word_char() {
1300 let start = self.current_position();
1301 return self.read_word_starting_with("#", start);
1302 }
1303 if preserve_comments {
1304 self.read_comment();
1305 Some(LexedToken::comment())
1306 } else {
1307 self.skip_comment();
1308 self.next_lexed_token_inner(false)
1309 }
1310 }
1311 '0'..='9' => self.read_word_or_fd_redirect(),
1313 _ => self.read_word(),
1314 }
1315 }
1316
1317 fn skip_whitespace(&mut self) {
1318 while let Some(ch) = self.peek_char() {
1319 if self.reinject_buf.is_empty() {
1320 let whitespace_len = self.source_horizontal_whitespace_len();
1321 if whitespace_len > 0 {
1322 self.consume_source_bytes(whitespace_len);
1323 continue;
1324 }
1325
1326 if self.cursor.rest().starts_with("\\\n") {
1327 self.consume_source_bytes(2);
1328 continue;
1329 }
1330 }
1331
1332 if ch == ' ' || ch == '\t' {
1333 self.consume_ascii_chars(1);
1334 } else if ch == '\\' {
1335 if self.second_char() == Some('\n') {
1337 self.consume_ascii_chars(2);
1338 } else {
1339 break;
1340 }
1341 } else {
1342 break;
1343 }
1344 }
1345 }
1346
1347 fn skip_comment(&mut self) {
1348 if self.reinject_buf.is_empty() {
1349 let end = self
1350 .cursor
1351 .find_byte(b'\n')
1352 .unwrap_or(self.cursor.rest().len());
1353 self.consume_source_bytes(end);
1354 return;
1355 }
1356
1357 while let Some(ch) = self.peek_char() {
1358 if ch == '\n' {
1359 break;
1360 }
1361 self.advance();
1362 }
1363 }
1364
1365 fn read_comment(&mut self) {
1366 debug_assert_eq!(self.peek_char(), Some('#'));
1367
1368 if self.reinject_buf.is_empty() {
1369 let rest = self.cursor.rest();
1370 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1371 self.consume_source_bytes(end);
1372 return;
1373 }
1374
1375 self.advance(); while let Some(ch) = self.peek_char() {
1378 if ch == '\n' {
1379 break;
1380 }
1381 self.advance();
1382 }
1383 }
1384
1385 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1386 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1387 return false;
1388 }
1389
1390 let line_start = self.input[..self.offset]
1391 .rfind('\n')
1392 .map_or(0, |index| index + 1);
1393 let prefix = &self.input[line_start..self.offset];
1394 line_has_unclosed_double_paren(prefix)
1395 }
1396
1397 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1400 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1401 let Some(fd) = first_digit.to_digit(10) else {
1402 unreachable!("peeked ASCII digit should convert to a base-10 digit");
1403 };
1404 let fd = fd as i32;
1405
1406 match (self.second_char(), self.third_char()) {
1407 (Some('>'), Some('>')) => {
1408 if self.fourth_char() == Some('|') {
1409 self.consume_ascii_chars(4);
1410 } else {
1411 self.consume_ascii_chars(3);
1412 }
1413 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1414 }
1415 (Some('>'), Some('|')) => {
1416 self.consume_ascii_chars(3);
1417 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1418 }
1419 (Some('>'), Some('&')) => {
1420 self.consume_ascii_chars(3);
1421
1422 let mut target_str = String::with_capacity(4);
1423 while let Some(c) = self.peek_char() {
1424 if c.is_ascii_digit() {
1425 target_str.push(c);
1426 self.advance();
1427 } else {
1428 break;
1429 }
1430 }
1431
1432 if target_str.is_empty() {
1433 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1434 }
1435
1436 let target_fd: i32 = target_str.parse().unwrap_or(1);
1437 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1438 }
1439 (Some('>'), _) => {
1440 self.consume_ascii_chars(2);
1441 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1442 }
1443 (Some('<'), Some('&')) => {
1444 self.consume_ascii_chars(3);
1445
1446 let mut target_str = String::with_capacity(4);
1447 while let Some(c) = self.peek_char() {
1448 if c.is_ascii_digit() || c == '-' {
1449 target_str.push(c);
1450 self.advance();
1451 if c == '-' {
1452 break;
1453 }
1454 } else {
1455 break;
1456 }
1457 }
1458
1459 if target_str == "-" {
1460 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1461 }
1462 let target_fd: i32 = target_str.parse().unwrap_or(0);
1463 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1464 }
1465 (Some('<'), Some('>')) => {
1466 self.consume_ascii_chars(3);
1467 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1468 }
1469 (Some('<'), Some('<')) => {}
1470 (Some('<'), _) => {
1471 self.consume_ascii_chars(2);
1472 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1473 }
1474 _ => {}
1475 }
1476 }
1477
1478 self.read_word()
1480 }
1481
1482 fn read_word_starting_with(
1483 &mut self,
1484 _prefix: &str,
1485 start: Position,
1486 ) -> Option<LexedToken<'a>> {
1487 let segment = match self.read_unquoted_segment(start) {
1488 Ok(segment) => segment,
1489 Err(kind) => return Some(LexedToken::error(kind)),
1490 };
1491 if segment.as_str().is_empty() {
1492 return None;
1493 }
1494 let mut lexed_word = LexedWord::from_segment(segment);
1495 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1496 return Some(LexedToken::error(kind));
1497 }
1498 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1499 }
1500
1501 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1502 let start = self.current_position();
1503
1504 if self.reinject_buf.is_empty() {
1505 let ascii_len = self.source_ascii_plain_word_len();
1506 let chunk = if ascii_len > 0
1507 && self
1508 .cursor
1509 .rest()
1510 .as_bytes()
1511 .get(ascii_len)
1512 .is_none_or(|byte| byte.is_ascii())
1513 {
1514 self.consume_source_bytes(ascii_len);
1515 &self.input[start.offset..self.offset]
1516 } else {
1517 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1518 self.advance_scanned_source_bytes(chunk.len());
1519 chunk
1520 };
1521 if !chunk.is_empty() {
1522 let continues = matches!(
1523 self.peek_char(),
1524 Some(next)
1525 if Self::is_word_char(next)
1526 || next == '$'
1527 || matches!(next, '\'' | '"')
1528 || next == '{'
1529 || (next == '\\' && self.second_char() == Some('\n'))
1530 || (next == '('
1531 && (chunk.ends_with('=')
1532 || Self::word_can_take_parenthesized_suffix(chunk)))
1533 );
1534
1535 if !continues {
1536 let end = self.current_position();
1537 return Some(LexedToken::borrowed_word(
1538 TokenKind::Word,
1539 &self.input[start.offset..self.offset],
1540 Some(Span::from_positions(start, end)),
1541 ));
1542 }
1543
1544 if self.peek_char() == Some('(')
1545 && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1546 {
1547 return self.read_complex_word(start);
1548 }
1549
1550 let end = self.current_position();
1551 return self.finish_segmented_word(LexedWord::borrowed(
1552 LexedWordSegmentKind::Plain,
1553 &self.input[start.offset..self.offset],
1554 Some(Span::from_positions(start, end)),
1555 ));
1556 }
1557 }
1558
1559 self.read_complex_word(start)
1560 }
1561
1562 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1563 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1564 return Some(LexedToken::error(kind));
1565 }
1566
1567 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1568 }
1569
1570 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1571 if self.peek_char() == Some('$') {
1572 match self.second_char() {
1573 Some('\'') => return self.read_dollar_single_quoted_string(),
1574 Some('"') => return self.read_dollar_double_quoted_string(),
1575 _ => {}
1576 }
1577 }
1578
1579 let segment = match self.read_unquoted_segment(start) {
1580 Ok(segment) => segment,
1581 Err(kind) => return Some(LexedToken::error(kind)),
1582 };
1583
1584 if segment.as_str().is_empty() {
1585 return None;
1586 }
1587
1588 self.finish_segmented_word(LexedWord::from_segment(segment))
1589 }
1590
1591 fn read_unquoted_segment(
1592 &mut self,
1593 start: Position,
1594 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1595 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1596 while let Some(ch) = self.peek_char() {
1597 if ch == '"' || ch == '\'' {
1598 break;
1599 } else if ch == '$' {
1600 if matches!(self.second_char(), Some('\'') | Some('"'))
1601 && (self.current_position().offset > start.offset
1602 || word.as_ref().is_some_and(|word| !word.is_empty()))
1603 {
1604 break;
1605 }
1606
1607 self.advance();
1609
1610 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1614 Self::push_capture_char(&mut word, '[');
1615 self.advance();
1616 if !self.read_legacy_arithmetic_into(&mut word, start) {
1617 return Err(LexerErrorKind::CommandSubstitution);
1618 }
1619 } else if self.peek_char() == Some('(') {
1620 if self.second_char() == Some('(') {
1621 if !self.read_arithmetic_expansion_into(&mut word) {
1622 return Err(LexerErrorKind::CommandSubstitution);
1623 }
1624 } else {
1625 Self::push_capture_char(&mut word, '(');
1626 self.advance();
1627 if !self.read_command_subst_into(&mut word) {
1628 return Err(LexerErrorKind::CommandSubstitution);
1629 }
1630 }
1631 } else if self.peek_char() == Some('{') {
1632 Self::push_capture_char(&mut word, '{');
1635 self.advance();
1636 let _ = self.read_param_expansion_into(&mut word, start);
1637 } else {
1638 if let Some(c) = self.peek_char() {
1640 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1641 || c.is_ascii_digit()
1642 {
1643 Self::push_capture_char(&mut word, c);
1644 self.advance();
1645 } else {
1646 while let Some(c) = self.peek_char() {
1648 if c.is_ascii_alphanumeric() || c == '_' {
1649 Self::push_capture_char(&mut word, c);
1650 self.advance();
1651 } else {
1652 break;
1653 }
1654 }
1655 }
1656 }
1657 }
1658 } else if ch == '{' {
1659 if self.looks_like_mid_word_brace_segment() {
1660 Self::push_capture_char(&mut word, ch);
1663 self.advance();
1664 self.consume_mid_word_brace_segment(&mut word);
1665 } else {
1666 Self::push_capture_char(&mut word, ch);
1669 self.advance();
1670 }
1671 } else if ch == '`' {
1672 let capture_end = self.current_position();
1675 self.ensure_capture_from_source(&mut word, start, capture_end);
1676 Self::push_capture_char(&mut word, ch);
1677 self.advance(); let mut closed = false;
1679 while let Some(c) = self.peek_char() {
1680 Self::push_capture_char(&mut word, c);
1681 self.advance();
1682 if c == '`' {
1683 closed = true;
1684 break;
1685 }
1686 if c == '\\'
1687 && let Some(next) = self.peek_char()
1688 {
1689 Self::push_capture_char(&mut word, next);
1690 self.advance();
1691 }
1692 }
1693 if !closed {
1694 return Err(LexerErrorKind::BacktickSubstitution);
1695 }
1696 } else if ch == '\\' {
1697 let capture_end = self.current_position();
1698 self.ensure_capture_from_source(&mut word, start, capture_end);
1699 self.advance();
1700 if let Some(next) = self.peek_char() {
1701 if next == '\n' {
1702 self.advance();
1704 } else {
1705 Self::push_capture_char(&mut word, '\x00');
1710 Self::push_capture_char(&mut word, next);
1711 self.advance();
1712 if next == '{'
1713 && self.current_word_surface_is_single_char(start, &word, '{')
1714 && self.escaped_brace_sequence_looks_like_brace_expansion()
1715 {
1716 let mut depth = 1;
1717 while let Some(c) = self.peek_char() {
1718 Self::push_capture_char(&mut word, c);
1719 self.advance();
1720 match c {
1721 '{' => depth += 1,
1722 '}' => {
1723 depth -= 1;
1724 if depth == 0 {
1725 break;
1726 }
1727 }
1728 _ => {}
1729 }
1730 }
1731 }
1732 }
1733 } else {
1734 Self::push_capture_char(&mut word, '\\');
1735 }
1736 } else if ch == '('
1737 && self.current_word_surface_ends_with_char(start, &word, '=')
1738 && self.looks_like_assoc_assign()
1739 {
1740 Self::push_capture_char(&mut word, ch);
1743 self.advance();
1744 let mut depth = 1;
1745 while let Some(c) = self.peek_char() {
1746 Self::push_capture_char(&mut word, c);
1747 self.advance();
1748 match c {
1749 '(' => depth += 1,
1750 ')' => {
1751 depth -= 1;
1752 if depth == 0 {
1753 break;
1754 }
1755 }
1756 '"' => {
1757 while let Some(qc) = self.peek_char() {
1758 Self::push_capture_char(&mut word, qc);
1759 self.advance();
1760 if qc == '"' {
1761 break;
1762 }
1763 if qc == '\\'
1764 && let Some(esc) = self.peek_char()
1765 {
1766 Self::push_capture_char(&mut word, esc);
1767 self.advance();
1768 }
1769 }
1770 }
1771 '\'' => {
1772 while let Some(qc) = self.peek_char() {
1773 Self::push_capture_char(&mut word, qc);
1774 self.advance();
1775 if qc == '\'' {
1776 break;
1777 }
1778 }
1779 }
1780 '\\' => {
1781 if let Some(esc) = self.peek_char() {
1782 Self::push_capture_char(&mut word, esc);
1783 self.advance();
1784 }
1785 }
1786 _ => {}
1787 }
1788 }
1789 } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1790 {
1791 Self::push_capture_char(&mut word, ch);
1794 self.advance();
1795 let mut depth = 1;
1796 while let Some(c) = self.peek_char() {
1797 Self::push_capture_char(&mut word, c);
1798 self.advance();
1799 match c {
1800 '(' => depth += 1,
1801 ')' => {
1802 depth -= 1;
1803 if depth == 0 {
1804 break;
1805 }
1806 }
1807 '\\' => {
1808 if let Some(esc) = self.peek_char() {
1809 Self::push_capture_char(&mut word, esc);
1810 self.advance();
1811 }
1812 }
1813 _ => {}
1814 }
1815 }
1816 } else if Self::is_plain_word_char(ch) {
1817 if self.reinject_buf.is_empty() {
1818 let ascii_len = self.source_ascii_plain_word_len();
1819 let chunk = if ascii_len > 0
1820 && self
1821 .cursor
1822 .rest()
1823 .as_bytes()
1824 .get(ascii_len)
1825 .is_none_or(|byte| byte.is_ascii())
1826 {
1827 self.consume_source_bytes(ascii_len);
1828 &self.input[self.offset - ascii_len..self.offset]
1829 } else {
1830 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1831 self.advance_scanned_source_bytes(chunk.len());
1832 chunk
1833 };
1834 Self::push_capture_str(&mut word, chunk);
1835 } else {
1836 Self::push_capture_char(&mut word, ch);
1837 self.advance();
1838 }
1839 } else {
1840 break;
1841 }
1842 }
1843
1844 if let Some(word) = word {
1845 let span = Some(Span::from_positions(start, self.current_position()));
1846 Ok(LexedWordSegment::owned_with_spans(
1847 LexedWordSegmentKind::Plain,
1848 word,
1849 span,
1850 span,
1851 ))
1852 } else {
1853 let end = self.current_position();
1854 Ok(LexedWordSegment::borrowed(
1855 LexedWordSegmentKind::Plain,
1856 &self.input[start.offset..self.offset],
1857 Some(Span::from_positions(start, end)),
1858 ))
1859 }
1860 }
1861
1862 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1863 let segment = match self.read_single_quoted_segment() {
1864 Ok(segment) => segment,
1865 Err(kind) => return Some(LexedToken::error(kind)),
1866 };
1867 let mut word = LexedWord::from_segment(segment);
1868 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1869 return Some(LexedToken::error(kind));
1870 }
1871
1872 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1873 }
1874
1875 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1876 debug_assert_eq!(self.peek_char(), Some('\''));
1877
1878 let wrapper_start = self.current_position();
1879 self.consume_ascii_chars(1); let content_start = self.current_position();
1881 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1882 let mut content_end = content_start;
1883 let mut content = String::with_capacity(16);
1884 let mut closed = false;
1885
1886 if can_borrow {
1887 let rest = self.cursor.rest();
1888 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1889 self.consume_source_bytes(quote_index);
1890 content_end = self.current_position();
1891 self.consume_ascii_chars(1); closed = true;
1893 } else {
1894 self.consume_source_bytes(rest.len());
1895 }
1896 }
1897
1898 while let Some(ch) = self.peek_char() {
1899 if closed {
1900 break;
1901 }
1902 if ch == '\'' {
1903 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1904 if !can_borrow {
1905 content.push('\'');
1906 }
1907 self.advance();
1908 self.advance();
1909 continue;
1910 }
1911 content_end = self.current_position();
1912 self.consume_ascii_chars(1); closed = true;
1914 break;
1915 }
1916 if !can_borrow {
1917 content.push(ch);
1918 }
1919 self.advance();
1920 }
1921
1922 if !closed {
1923 return Err(LexerErrorKind::SingleQuote);
1924 }
1925
1926 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1927 let content_span = Some(Span::from_positions(content_start, content_end));
1928
1929 if can_borrow {
1930 Ok(LexedWordSegment::borrowed_with_spans(
1931 LexedWordSegmentKind::SingleQuoted,
1932 &self.input[content_start.offset..content_end.offset],
1933 content_span,
1934 wrapper_span,
1935 ))
1936 } else {
1937 Ok(LexedWordSegment::owned_with_spans(
1938 LexedWordSegmentKind::SingleQuoted,
1939 content,
1940 content_span,
1941 wrapper_span,
1942 ))
1943 }
1944 }
1945
1946 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1947 let segment = match self.read_dollar_single_quoted_segment() {
1948 Ok(segment) => segment,
1949 Err(kind) => return Some(LexedToken::error(kind)),
1950 };
1951 let mut word = LexedWord::from_segment(segment);
1952 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1953 return Some(LexedToken::error(kind));
1954 }
1955
1956 let kind = if word.single_segment().is_some() {
1957 TokenKind::LiteralWord
1958 } else {
1959 TokenKind::Word
1960 };
1961
1962 Some(LexedToken::with_word_payload(kind, word))
1963 }
1964
1965 fn read_dollar_single_quoted_segment(
1966 &mut self,
1967 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1968 debug_assert_eq!(self.peek_char(), Some('$'));
1969 debug_assert_eq!(self.second_char(), Some('\''));
1970
1971 let wrapper_start = self.current_position();
1972 self.consume_ascii_chars(2); let content_start = self.current_position();
1974 let mut out = String::with_capacity(16);
1975
1976 while let Some(ch) = self.peek_char() {
1977 if ch == '\'' {
1978 let content_end = self.current_position();
1979 self.advance();
1980 let wrapper_span =
1981 Some(Span::from_positions(wrapper_start, self.current_position()));
1982 let content_span = Some(Span::from_positions(content_start, content_end));
1983 return Ok(LexedWordSegment::owned_with_spans(
1984 LexedWordSegmentKind::DollarSingleQuoted,
1985 out,
1986 content_span,
1987 wrapper_span,
1988 ));
1989 }
1990
1991 if ch == '\\' {
1992 self.advance();
1993 if let Some(esc) = self.peek_char() {
1994 self.advance();
1995 match esc {
1996 'n' => out.push('\n'),
1997 't' => out.push('\t'),
1998 'r' => out.push('\r'),
1999 'a' => out.push('\x07'),
2000 'b' => out.push('\x08'),
2001 'f' => out.push('\x0C'),
2002 'v' => out.push('\x0B'),
2003 'e' | 'E' => out.push('\x1B'),
2004 '\\' => out.push('\\'),
2005 '\'' => out.push('\''),
2006 '"' => out.push('"'),
2007 '?' => out.push('?'),
2008 'c' => {
2009 if let Some(control) = self.peek_char() {
2010 self.advance();
2011 out.push(((control as u32 & 0x1F) as u8) as char);
2012 } else {
2013 out.push('\\');
2014 out.push('c');
2015 }
2016 }
2017 'x' => {
2018 let mut hex = String::new();
2019 for _ in 0..2 {
2020 if let Some(h) = self.peek_char() {
2021 if h.is_ascii_hexdigit() {
2022 hex.push(h);
2023 self.advance();
2024 } else {
2025 break;
2026 }
2027 }
2028 }
2029 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2030 out.push(val as char);
2031 }
2032 }
2033 'u' => {
2034 let mut hex = String::new();
2035 for _ in 0..4 {
2036 if let Some(h) = self.peek_char() {
2037 if h.is_ascii_hexdigit() {
2038 hex.push(h);
2039 self.advance();
2040 } else {
2041 break;
2042 }
2043 }
2044 }
2045 if let Ok(val) = u32::from_str_radix(&hex, 16)
2046 && let Some(c) = char::from_u32(val)
2047 {
2048 out.push(c);
2049 }
2050 }
2051 'U' => {
2052 let mut hex = String::new();
2053 for _ in 0..8 {
2054 if let Some(h) = self.peek_char() {
2055 if h.is_ascii_hexdigit() {
2056 hex.push(h);
2057 self.advance();
2058 } else {
2059 break;
2060 }
2061 }
2062 }
2063 if let Ok(val) = u32::from_str_radix(&hex, 16)
2064 && let Some(c) = char::from_u32(val)
2065 {
2066 out.push(c);
2067 }
2068 }
2069 '0'..='7' => {
2070 let mut oct = String::new();
2071 oct.push(esc);
2072 for _ in 0..2 {
2073 if let Some(o) = self.peek_char() {
2074 if o.is_ascii_digit() && o < '8' {
2075 oct.push(o);
2076 self.advance();
2077 } else {
2078 break;
2079 }
2080 }
2081 }
2082 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2083 out.push(val as char);
2084 }
2085 }
2086 _ => {
2087 out.push('\\');
2088 out.push(esc);
2089 }
2090 }
2091 } else {
2092 out.push('\\');
2093 }
2094 continue;
2095 }
2096
2097 out.push(ch);
2098 self.advance();
2099 }
2100
2101 Err(LexerErrorKind::SingleQuote)
2102 }
2103
2104 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2105 let start = self.current_position();
2106
2107 if self.reinject_buf.is_empty() {
2108 let ascii_len = self.source_ascii_plain_word_len();
2109 let chunk = if ascii_len > 0
2110 && self
2111 .cursor
2112 .rest()
2113 .as_bytes()
2114 .get(ascii_len)
2115 .is_none_or(|byte| byte.is_ascii())
2116 {
2117 self.consume_source_bytes(ascii_len);
2118 &self.input[start.offset..self.offset]
2119 } else {
2120 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2121 self.advance_scanned_source_bytes(chunk.len());
2122 chunk
2123 };
2124 if chunk.is_empty() {
2125 return None;
2126 }
2127
2128 let end = self.current_position();
2129 return Some(LexedWordSegment::borrowed(
2130 LexedWordSegmentKind::Plain,
2131 &self.input[start.offset..self.offset],
2132 Some(Span::from_positions(start, end)),
2133 ));
2134 }
2135
2136 let ch = self.peek_char()?;
2137 if !Self::is_plain_word_char(ch) {
2138 return None;
2139 }
2140
2141 let mut text = String::with_capacity(16);
2142 while let Some(ch) = self.peek_char() {
2143 if !Self::is_plain_word_char(ch) {
2144 break;
2145 }
2146 text.push(ch);
2147 self.advance();
2148 }
2149
2150 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2151 }
2152
2153 fn append_segmented_continuation(
2156 &mut self,
2157 word: &mut LexedWord<'a>,
2158 ) -> Result<(), LexerErrorKind> {
2159 loop {
2160 match self.peek_char() {
2161 Some('\\') if self.second_char() == Some('\n') => {
2162 self.advance();
2163 self.advance();
2164 continue;
2165 }
2166 Some('\'') => {
2167 word.push_segment(self.read_single_quoted_segment()?);
2168 }
2169 Some('"') => {
2170 word.push_segment(self.read_double_quoted_segment()?);
2171 }
2172 Some('$') if self.second_char() == Some('\'') => {
2173 word.push_segment(self.read_dollar_single_quoted_segment()?);
2174 }
2175 Some('$') if self.second_char() == Some('"') => {
2176 word.push_segment(self.read_dollar_double_quoted_segment()?);
2177 }
2178 Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2179 let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2180 unreachable!("peeked '(' should produce a suffix segment");
2181 };
2182 word.push_segment(segment);
2183 }
2184 _ => {
2185 if let Some(segment) = self.read_plain_continuation_segment() {
2186 word.push_segment(segment);
2187 continue;
2188 }
2189
2190 let start = self.current_position();
2191 let plain = self.read_unquoted_segment(start)?;
2192 if plain.as_str().is_empty() {
2193 break;
2194 }
2195 word.push_segment(plain);
2196 }
2197 }
2198 }
2199
2200 Ok(())
2201 }
2202
2203 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2204 debug_assert_eq!(self.peek_char(), Some('('));
2205
2206 let start = self.current_position();
2207 let mut depth = 0usize;
2208 let mut escaped = false;
2209 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2210
2211 while let Some(ch) = self.peek_char() {
2212 if let Some(text) = text.as_mut() {
2213 text.push(ch);
2214 }
2215 self.advance();
2216
2217 if escaped {
2218 escaped = false;
2219 continue;
2220 }
2221
2222 match ch {
2223 '\\' => escaped = true,
2224 '(' => depth += 1,
2225 ')' => {
2226 depth = depth.saturating_sub(1);
2227 if depth == 0 {
2228 break;
2229 }
2230 }
2231 _ => {}
2232 }
2233 }
2234
2235 let end = self.current_position();
2236 let span = Some(Span::from_positions(start, end));
2237 if let Some(text) = text {
2238 Some(LexedWordSegment::owned_with_spans(
2239 LexedWordSegmentKind::Plain,
2240 text,
2241 span,
2242 span,
2243 ))
2244 } else {
2245 Some(LexedWordSegment::borrowed_with_spans(
2246 LexedWordSegmentKind::Plain,
2247 &self.input[start.offset..end.offset],
2248 span,
2249 span,
2250 ))
2251 }
2252 }
2253
2254 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2255 self.read_double_quoted_word(false)
2256 }
2257
2258 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2259 self.read_double_quoted_word(true)
2260 }
2261
2262 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2263 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2264 Ok(segment) => segment,
2265 Err(kind) => return Some(LexedToken::error(kind)),
2266 };
2267 let mut word = LexedWord::from_segment(segment);
2268 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2269 return Some(LexedToken::error(kind));
2270 }
2271
2272 let kind = if word.single_segment().is_some() {
2273 TokenKind::QuotedWord
2274 } else {
2275 TokenKind::Word
2276 };
2277
2278 Some(LexedToken::with_word_payload(kind, word))
2279 }
2280
2281 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2282 self.read_double_quoted_segment_with_dollar(false)
2283 }
2284
2285 fn read_dollar_double_quoted_segment(
2286 &mut self,
2287 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2288 self.read_double_quoted_segment_with_dollar(true)
2289 }
2290
2291 fn read_double_quoted_segment_with_dollar(
2292 &mut self,
2293 dollar: bool,
2294 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2295 if dollar {
2296 debug_assert_eq!(self.peek_char(), Some('$'));
2297 debug_assert_eq!(self.second_char(), Some('"'));
2298 } else {
2299 debug_assert_eq!(self.peek_char(), Some('"'));
2300 }
2301
2302 let wrapper_start = self.current_position();
2303 if dollar {
2304 self.consume_ascii_chars(2); } else {
2306 self.consume_ascii_chars(1); }
2308 let content_start = self.current_position();
2309 let mut content_end = content_start;
2310 let mut simple = self.reinject_buf.is_empty();
2311 let mut borrowable = self.reinject_buf.is_empty();
2312 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2313 let mut closed = false;
2314
2315 while let Some(ch) = self.peek_char() {
2316 if simple {
2317 if self.reinject_buf.is_empty() {
2318 let rest = self.cursor.rest();
2319 match Self::find_double_quote_special(rest) {
2320 Some(index) if index > 0 => {
2321 self.consume_source_bytes(index);
2322 continue;
2323 }
2324 None => {
2325 self.consume_source_bytes(rest.len());
2326 return Err(LexerErrorKind::DoubleQuote);
2327 }
2328 _ => {}
2329 }
2330 }
2331
2332 match ch {
2333 '"' => {
2334 content_end = self.current_position();
2335 self.consume_ascii_chars(1); closed = true;
2337 break;
2338 }
2339 '\\' | '$' | '`' => {
2340 simple = false;
2341 if ch == '`' {
2342 borrowable = false;
2343 let capture_end = self.current_position();
2344 self.ensure_capture_from_source(
2345 &mut content,
2346 content_start,
2347 capture_end,
2348 );
2349 }
2350 }
2351 _ => {
2352 self.advance();
2353 }
2354 }
2355 if simple {
2356 continue;
2357 }
2358 }
2359
2360 match ch {
2361 '"' => {
2362 if borrowable {
2363 content_end = self.current_position();
2364 }
2365 self.consume_ascii_chars(1); closed = true;
2367 break;
2368 }
2369 '\\' => {
2370 let escape_start = self.current_position();
2371 self.advance();
2372 if let Some(next) = self.peek_char() {
2373 match next {
2374 '\n' => {
2375 borrowable = false;
2376 self.ensure_capture_from_source(
2377 &mut content,
2378 content_start,
2379 escape_start,
2380 );
2381 self.advance();
2382 }
2383 '$' => {
2384 borrowable = false;
2385 self.ensure_capture_from_source(
2386 &mut content,
2387 content_start,
2388 escape_start,
2389 );
2390 Self::push_capture_char(&mut content, '\x00');
2391 Self::push_capture_char(&mut content, '$');
2392 self.advance();
2393 }
2394 '"' | '\\' | '`' => {
2395 borrowable = false;
2396 self.ensure_capture_from_source(
2397 &mut content,
2398 content_start,
2399 escape_start,
2400 );
2401 if next == '\\' {
2402 Self::push_capture_char(&mut content, '\x00');
2403 }
2404 if next == '`' {
2405 Self::push_capture_char(&mut content, '\x00');
2406 }
2407 Self::push_capture_char(&mut content, next);
2408 self.advance();
2409 content_end = self.current_position();
2410 }
2411 _ => {
2412 Self::push_capture_char(&mut content, '\\');
2413 Self::push_capture_char(&mut content, next);
2414 self.advance();
2415 content_end = self.current_position();
2416 }
2417 }
2418 }
2419 }
2420 '$' => {
2421 Self::push_capture_char(&mut content, '$');
2422 self.advance();
2423 if self.peek_char() == Some('(') {
2424 if self.second_char() == Some('(') {
2425 self.read_arithmetic_expansion_into(&mut content);
2426 } else {
2427 Self::push_capture_char(&mut content, '(');
2428 self.advance();
2429 self.read_command_subst_into(&mut content);
2430 }
2431 } else if self.peek_char() == Some('{') {
2432 Self::push_capture_char(&mut content, '{');
2433 self.advance();
2434 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2435 }
2436 content_end = self.current_position();
2437 }
2438 '`' => {
2439 borrowable = false;
2440 let capture_end = self.current_position();
2441 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2442 Self::push_capture_char(&mut content, '`');
2443 self.advance(); while let Some(c) = self.peek_char() {
2445 Self::push_capture_char(&mut content, c);
2446 self.advance();
2447 if c == '`' {
2448 break;
2449 }
2450 if c == '\\'
2451 && let Some(next) = self.peek_char()
2452 {
2453 Self::push_capture_char(&mut content, next);
2454 self.advance();
2455 }
2456 }
2457 content_end = self.current_position();
2458 }
2459 _ => {
2460 Self::push_capture_char(&mut content, ch);
2461 self.advance();
2462 content_end = self.current_position();
2463 }
2464 }
2465 }
2466
2467 if !closed {
2468 return Err(LexerErrorKind::DoubleQuote);
2469 }
2470
2471 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2472 let content_span = Some(Span::from_positions(content_start, content_end));
2473
2474 if borrowable {
2475 Ok(LexedWordSegment::borrowed_with_spans(
2476 if dollar {
2477 LexedWordSegmentKind::DollarDoubleQuoted
2478 } else {
2479 LexedWordSegmentKind::DoubleQuoted
2480 },
2481 &self.input[content_start.offset..content_end.offset],
2482 content_span,
2483 wrapper_span,
2484 ))
2485 } else {
2486 Ok(LexedWordSegment::owned_with_spans(
2487 if dollar {
2488 LexedWordSegmentKind::DollarDoubleQuoted
2489 } else {
2490 LexedWordSegmentKind::DoubleQuoted
2491 },
2492 content.unwrap_or_default(),
2493 content_span,
2494 wrapper_span,
2495 ))
2496 }
2497 }
2498
2499 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2500 debug_assert_eq!(self.peek_char(), Some('('));
2501 debug_assert_eq!(self.second_char(), Some('('));
2502
2503 Self::push_capture_char(content, '(');
2504 self.advance();
2505 Self::push_capture_char(content, '(');
2506 self.advance();
2507
2508 let mut depth = 2;
2509 while let Some(c) = self.peek_char() {
2510 match c {
2511 '\\' => {
2512 Self::push_capture_char(content, c);
2513 self.advance();
2514 if let Some(next) = self.peek_char() {
2515 Self::push_capture_char(content, next);
2516 self.advance();
2517 }
2518 }
2519 '\'' => {
2520 Self::push_capture_char(content, c);
2521 self.advance();
2522 while let Some(quoted) = self.peek_char() {
2523 Self::push_capture_char(content, quoted);
2524 self.advance();
2525 if quoted == '\'' {
2526 break;
2527 }
2528 }
2529 }
2530 '"' => {
2531 let mut escaped = false;
2532 Self::push_capture_char(content, c);
2533 self.advance();
2534 while let Some(quoted) = self.peek_char() {
2535 Self::push_capture_char(content, quoted);
2536 self.advance();
2537 if escaped {
2538 escaped = false;
2539 continue;
2540 }
2541 match quoted {
2542 '\\' => escaped = true,
2543 '"' => break,
2544 _ => {}
2545 }
2546 }
2547 }
2548 '`' => {
2549 let mut escaped = false;
2550 Self::push_capture_char(content, c);
2551 self.advance();
2552 while let Some(quoted) = self.peek_char() {
2553 Self::push_capture_char(content, quoted);
2554 self.advance();
2555 if escaped {
2556 escaped = false;
2557 continue;
2558 }
2559 match quoted {
2560 '\\' => escaped = true,
2561 '`' => break,
2562 _ => {}
2563 }
2564 }
2565 }
2566 '(' => {
2567 Self::push_capture_char(content, c);
2568 self.advance();
2569 depth += 1;
2570 }
2571 ')' => {
2572 Self::push_capture_char(content, c);
2573 self.advance();
2574 depth -= 1;
2575 if depth == 0 {
2576 return true;
2577 }
2578 }
2579 _ => {
2580 Self::push_capture_char(content, c);
2581 self.advance();
2582 }
2583 }
2584 }
2585
2586 false
2587 }
2588
2589 fn read_legacy_arithmetic_into(
2590 &mut self,
2591 content: &mut Option<String>,
2592 segment_start: Position,
2593 ) -> bool {
2594 let mut bracket_depth = 1;
2595
2596 while let Some(c) = self.peek_char() {
2597 match c {
2598 '\\' => {
2599 Self::push_capture_char(content, c);
2600 self.advance();
2601 if let Some(next) = self.peek_char() {
2602 Self::push_capture_char(content, next);
2603 self.advance();
2604 }
2605 }
2606 '\'' => {
2607 Self::push_capture_char(content, c);
2608 self.advance();
2609 while let Some(quoted) = self.peek_char() {
2610 Self::push_capture_char(content, quoted);
2611 self.advance();
2612 if quoted == '\'' {
2613 break;
2614 }
2615 }
2616 }
2617 '"' => {
2618 let mut escaped = false;
2619 Self::push_capture_char(content, c);
2620 self.advance();
2621 while let Some(quoted) = self.peek_char() {
2622 Self::push_capture_char(content, quoted);
2623 self.advance();
2624 if escaped {
2625 escaped = false;
2626 continue;
2627 }
2628 match quoted {
2629 '\\' => escaped = true,
2630 '"' => break,
2631 _ => {}
2632 }
2633 }
2634 }
2635 '`' => {
2636 let mut escaped = false;
2637 Self::push_capture_char(content, c);
2638 self.advance();
2639 while let Some(quoted) = self.peek_char() {
2640 Self::push_capture_char(content, quoted);
2641 self.advance();
2642 if escaped {
2643 escaped = false;
2644 continue;
2645 }
2646 match quoted {
2647 '\\' => escaped = true,
2648 '`' => break,
2649 _ => {}
2650 }
2651 }
2652 }
2653 '[' => {
2654 Self::push_capture_char(content, c);
2655 self.advance();
2656 bracket_depth += 1;
2657 }
2658 ']' => {
2659 Self::push_capture_char(content, c);
2660 self.advance();
2661 bracket_depth -= 1;
2662 if bracket_depth == 0 {
2663 return true;
2664 }
2665 }
2666 '$' => {
2667 Self::push_capture_char(content, c);
2668 self.advance();
2669 if self.peek_char() == Some('(') {
2670 if self.second_char() == Some('(') {
2671 if !self.read_arithmetic_expansion_into(content) {
2672 return false;
2673 }
2674 } else {
2675 Self::push_capture_char(content, '(');
2676 self.advance();
2677 if !self.read_command_subst_into(content) {
2678 return false;
2679 }
2680 }
2681 } else if self.peek_char() == Some('{') {
2682 Self::push_capture_char(content, '{');
2683 self.advance();
2684 if !self.read_param_expansion_into(content, segment_start) {
2685 return false;
2686 }
2687 } else if self.peek_char() == Some('[') {
2688 Self::push_capture_char(content, '[');
2689 self.advance();
2690 if !self.read_legacy_arithmetic_into(content, segment_start) {
2691 return false;
2692 }
2693 }
2694 }
2695 _ => {
2696 Self::push_capture_char(content, c);
2697 self.advance();
2698 }
2699 }
2700 }
2701
2702 false
2703 }
2704
2705 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2709 self.read_command_subst_into_depth(content, 0)
2710 }
2711
2712 fn flush_command_subst_keyword(
2713 current_word: &mut String,
2714 pending_case_headers: &mut usize,
2715 case_clause_depths: &mut SmallVec<[usize; 4]>,
2716 depth: usize,
2717 word_started_at_command_start: &mut bool,
2718 ) {
2719 if current_word.is_empty() {
2720 *word_started_at_command_start = false;
2721 return;
2722 }
2723
2724 match current_word.as_str() {
2725 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2726 "in" if *pending_case_headers > 0 => {
2727 *pending_case_headers -= 1;
2728 case_clause_depths.push(depth);
2729 }
2730 "esac" if *word_started_at_command_start => {
2731 case_clause_depths.pop();
2732 }
2733 _ => {}
2734 }
2735
2736 current_word.clear();
2737 *word_started_at_command_start = false;
2738 }
2739
2740 fn read_command_subst_heredoc_delimiter_into(
2741 &mut self,
2742 content: &mut Option<String>,
2743 ) -> Option<String> {
2744 while let Some(ch) = self.peek_char() {
2745 if !matches!(ch, ' ' | '\t') {
2746 break;
2747 }
2748 Self::push_capture_char(content, ch);
2749 self.advance();
2750 }
2751
2752 let mut cooked = String::new();
2753 let mut in_single = false;
2754 let mut in_double = false;
2755 let mut escaped = false;
2756 let mut saw_any = false;
2757
2758 while let Some(ch) = self.peek_char() {
2759 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2760 break;
2761 }
2762
2763 saw_any = true;
2764 Self::push_capture_char(content, ch);
2765 self.advance();
2766
2767 if escaped {
2768 cooked.push(ch);
2769 escaped = false;
2770 continue;
2771 }
2772
2773 match ch {
2774 '\\' if !in_single => escaped = true,
2775 '\'' if !in_double => in_single = !in_single,
2776 '"' if !in_single => in_double = !in_double,
2777 _ => cooked.push(ch),
2778 }
2779 }
2780
2781 saw_any.then_some(cooked)
2782 }
2783
2784 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2785 Self::push_capture_char(content, '`');
2786 self.advance();
2787 while let Some(ch) = self.peek_char() {
2788 Self::push_capture_char(content, ch);
2789 self.advance();
2790 if ch == '\\' {
2791 if let Some(esc) = self.peek_char() {
2792 Self::push_capture_char(content, esc);
2793 self.advance();
2794 }
2795 continue;
2796 }
2797 if ch == '`' {
2798 break;
2799 }
2800 }
2801 }
2802
2803 fn read_command_subst_pending_heredoc_into(
2804 &mut self,
2805 content: &mut Option<String>,
2806 delimiter: &str,
2807 strip_tabs: bool,
2808 ) -> bool {
2809 loop {
2810 let mut line = String::new();
2811 let mut saw_newline = false;
2812
2813 while let Some(ch) = self.peek_char() {
2814 self.advance();
2815 if ch == '\n' {
2816 saw_newline = true;
2817 break;
2818 }
2819 line.push(ch);
2820 }
2821
2822 Self::push_capture_str(content, &line);
2823 if saw_newline {
2824 Self::push_capture_char(content, '\n');
2825 }
2826
2827 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2828 return true;
2829 }
2830 }
2831 }
2832
2833 fn read_command_subst_into_depth(
2834 &mut self,
2835 content: &mut Option<String>,
2836 subst_depth: usize,
2837 ) -> bool {
2838 if subst_depth >= self.max_subst_depth {
2839 let mut depth = 1;
2841 while let Some(c) = self.peek_char() {
2842 self.advance();
2843 match c {
2844 '(' => depth += 1,
2845 ')' => {
2846 depth -= 1;
2847 if depth == 0 {
2848 Self::push_capture_char(content, ')');
2849 return true;
2850 }
2851 }
2852 _ => {}
2853 }
2854 }
2855 return false;
2856 }
2857
2858 let mut depth = 1;
2859 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2860 let mut pending_case_headers = 0usize;
2861 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2862 let mut current_word = String::with_capacity(16);
2863 let mut at_command_start = true;
2864 let mut expecting_redirection_target = false;
2865 let mut current_word_started_at_command_start = false;
2866 while let Some(c) = self.peek_char() {
2867 match c {
2868 '#' if !self.should_treat_hash_as_word_char() => {
2869 let had_word = !current_word.is_empty();
2870 Self::flush_command_subst_keyword(
2871 &mut current_word,
2872 &mut pending_case_headers,
2873 &mut case_clause_depths,
2874 depth,
2875 &mut current_word_started_at_command_start,
2876 );
2877 if had_word && expecting_redirection_target {
2878 expecting_redirection_target = false;
2879 }
2880 Self::push_capture_char(content, '#');
2881 self.advance();
2882 while let Some(comment_ch) = self.peek_char() {
2883 Self::push_capture_char(content, comment_ch);
2884 self.advance();
2885 if comment_ch == '\n' {
2886 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2887 if !self.read_command_subst_pending_heredoc_into(
2888 content, &delimiter, strip_tabs,
2889 ) {
2890 return false;
2891 }
2892 }
2893 at_command_start = true;
2894 expecting_redirection_target = false;
2895 break;
2896 }
2897 }
2898 }
2899 '(' => {
2900 Self::flush_command_subst_keyword(
2901 &mut current_word,
2902 &mut pending_case_headers,
2903 &mut case_clause_depths,
2904 depth,
2905 &mut current_word_started_at_command_start,
2906 );
2907 depth += 1;
2908 Self::push_capture_char(content, c);
2909 self.advance();
2910 at_command_start = true;
2911 expecting_redirection_target = false;
2912 }
2913 ')' => {
2914 Self::flush_command_subst_keyword(
2915 &mut current_word,
2916 &mut pending_case_headers,
2917 &mut case_clause_depths,
2918 depth,
2919 &mut current_word_started_at_command_start,
2920 );
2921 if case_clause_depths
2922 .last()
2923 .is_some_and(|case_depth| *case_depth == depth)
2924 {
2925 Self::push_capture_char(content, ')');
2926 self.advance();
2927 at_command_start = true;
2928 expecting_redirection_target = false;
2929 continue;
2930 }
2931 depth -= 1;
2932 self.advance();
2933 if depth == 0 {
2934 Self::push_capture_char(content, ')');
2935 return true;
2936 }
2937 Self::push_capture_char(content, c);
2938 at_command_start = false;
2939 expecting_redirection_target = false;
2940 }
2941 '"' => {
2942 let had_word = !current_word.is_empty();
2943 Self::flush_command_subst_keyword(
2944 &mut current_word,
2945 &mut pending_case_headers,
2946 &mut case_clause_depths,
2947 depth,
2948 &mut current_word_started_at_command_start,
2949 );
2950 if had_word && expecting_redirection_target {
2951 expecting_redirection_target = false;
2952 }
2953 Self::push_capture_char(content, '"');
2955 self.advance();
2956 while let Some(qc) = self.peek_char() {
2957 match qc {
2958 '"' => {
2959 Self::push_capture_char(content, '"');
2960 self.advance();
2961 break;
2962 }
2963 '\\' => {
2964 Self::push_capture_char(content, '\\');
2965 self.advance();
2966 if let Some(esc) = self.peek_char() {
2967 Self::push_capture_char(content, esc);
2968 self.advance();
2969 }
2970 }
2971 '$' => {
2972 Self::push_capture_char(content, '$');
2973 self.advance();
2974 if self.peek_char() == Some('(') {
2975 if self.second_char() == Some('(') {
2976 if !self.read_arithmetic_expansion_into(content) {
2977 return false;
2978 }
2979 } else {
2980 Self::push_capture_char(content, '(');
2981 self.advance();
2982 if !self
2983 .read_command_subst_into_depth(content, subst_depth + 1)
2984 {
2985 return false;
2986 }
2987 }
2988 }
2989 }
2990 _ => {
2991 Self::push_capture_char(content, qc);
2992 self.advance();
2993 }
2994 }
2995 }
2996 if expecting_redirection_target {
2997 expecting_redirection_target = false;
2998 } else {
2999 at_command_start = false;
3000 }
3001 }
3002 '\'' => {
3003 let had_word = !current_word.is_empty();
3004 Self::flush_command_subst_keyword(
3005 &mut current_word,
3006 &mut pending_case_headers,
3007 &mut case_clause_depths,
3008 depth,
3009 &mut current_word_started_at_command_start,
3010 );
3011 if had_word && expecting_redirection_target {
3012 expecting_redirection_target = false;
3013 }
3014 Self::push_capture_char(content, '\'');
3016 self.advance();
3017 while let Some(qc) = self.peek_char() {
3018 Self::push_capture_char(content, qc);
3019 self.advance();
3020 if qc == '\'' {
3021 break;
3022 }
3023 }
3024 if expecting_redirection_target {
3025 expecting_redirection_target = false;
3026 } else {
3027 at_command_start = false;
3028 }
3029 }
3030 '`' => {
3031 let had_word = !current_word.is_empty();
3032 Self::flush_command_subst_keyword(
3033 &mut current_word,
3034 &mut pending_case_headers,
3035 &mut case_clause_depths,
3036 depth,
3037 &mut current_word_started_at_command_start,
3038 );
3039 if had_word && expecting_redirection_target {
3040 expecting_redirection_target = false;
3041 }
3042 self.read_command_subst_backtick_segment_into(content);
3043 if expecting_redirection_target {
3044 expecting_redirection_target = false;
3045 } else {
3046 at_command_start = false;
3047 }
3048 }
3049 '$' if self.second_char() == Some('\'') => {
3050 let had_word = !current_word.is_empty();
3051 Self::flush_command_subst_keyword(
3052 &mut current_word,
3053 &mut pending_case_headers,
3054 &mut case_clause_depths,
3055 depth,
3056 &mut current_word_started_at_command_start,
3057 );
3058 if had_word && expecting_redirection_target {
3059 expecting_redirection_target = false;
3060 }
3061 Self::push_capture_char(content, '$');
3062 self.advance();
3063 Self::push_capture_char(content, '\'');
3064 self.advance();
3065 while let Some(qc) = self.peek_char() {
3066 Self::push_capture_char(content, qc);
3067 self.advance();
3068 if qc == '\\' {
3069 if let Some(esc) = self.peek_char() {
3070 Self::push_capture_char(content, esc);
3071 self.advance();
3072 }
3073 continue;
3074 }
3075 if qc == '\'' {
3076 break;
3077 }
3078 }
3079 if expecting_redirection_target {
3080 expecting_redirection_target = false;
3081 } else {
3082 at_command_start = false;
3083 }
3084 }
3085 '\\' => {
3086 let had_word = !current_word.is_empty();
3087 Self::flush_command_subst_keyword(
3088 &mut current_word,
3089 &mut pending_case_headers,
3090 &mut case_clause_depths,
3091 depth,
3092 &mut current_word_started_at_command_start,
3093 );
3094 if had_word && expecting_redirection_target {
3095 expecting_redirection_target = false;
3096 }
3097 Self::push_capture_char(content, '\\');
3098 self.advance();
3099 if let Some(esc) = self.peek_char() {
3100 Self::push_capture_char(content, esc);
3101 self.advance();
3102 }
3103 if expecting_redirection_target {
3104 expecting_redirection_target = false;
3105 } else {
3106 at_command_start = false;
3107 }
3108 }
3109 '<' if self.second_char() == Some('<') => {
3110 let word_was_redirection_fd = current_word_started_at_command_start
3111 && !current_word.is_empty()
3112 && current_word.chars().all(|current| current.is_ascii_digit());
3113 Self::flush_command_subst_keyword(
3114 &mut current_word,
3115 &mut pending_case_headers,
3116 &mut case_clause_depths,
3117 depth,
3118 &mut current_word_started_at_command_start,
3119 );
3120 if word_was_redirection_fd {
3121 at_command_start = true;
3122 }
3123
3124 Self::push_capture_char(content, '<');
3125 self.advance();
3126 Self::push_capture_char(content, '<');
3127 self.advance();
3128
3129 if self.peek_char() == Some('<') {
3130 Self::push_capture_char(content, '<');
3131 self.advance();
3132 expecting_redirection_target = true;
3133 continue;
3134 }
3135
3136 let strip_tabs = if self.peek_char() == Some('-') {
3137 Self::push_capture_char(content, '-');
3138 self.advance();
3139 true
3140 } else {
3141 false
3142 };
3143
3144 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3145 {
3146 pending_heredocs.push((delimiter, strip_tabs));
3147 expecting_redirection_target = false;
3148 } else {
3149 expecting_redirection_target = true;
3150 }
3151 }
3152 '>' | '<' => {
3153 let word_was_redirection_fd = current_word_started_at_command_start
3154 && !current_word.is_empty()
3155 && current_word.chars().all(|current| current.is_ascii_digit());
3156 Self::flush_command_subst_keyword(
3157 &mut current_word,
3158 &mut pending_case_headers,
3159 &mut case_clause_depths,
3160 depth,
3161 &mut current_word_started_at_command_start,
3162 );
3163 if word_was_redirection_fd {
3164 at_command_start = true;
3165 }
3166 Self::push_capture_char(content, c);
3167 self.advance();
3168 expecting_redirection_target = true;
3169 }
3170 '\n' => {
3171 Self::flush_command_subst_keyword(
3172 &mut current_word,
3173 &mut pending_case_headers,
3174 &mut case_clause_depths,
3175 depth,
3176 &mut current_word_started_at_command_start,
3177 );
3178 Self::push_capture_char(content, '\n');
3179 self.advance();
3180 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3181 if !self.read_command_subst_pending_heredoc_into(
3182 content, &delimiter, strip_tabs,
3183 ) {
3184 return false;
3185 }
3186 }
3187 at_command_start = true;
3188 expecting_redirection_target = false;
3189 }
3190 _ => {
3191 if c.is_ascii_alphanumeric() || c == '_' {
3192 if current_word.is_empty()
3193 && !expecting_redirection_target
3194 && at_command_start
3195 {
3196 current_word_started_at_command_start = true;
3197 at_command_start = false;
3198 }
3199 current_word.push(c);
3200 } else {
3201 let had_word = !current_word.is_empty();
3202 Self::flush_command_subst_keyword(
3203 &mut current_word,
3204 &mut pending_case_headers,
3205 &mut case_clause_depths,
3206 depth,
3207 &mut current_word_started_at_command_start,
3208 );
3209 if had_word && expecting_redirection_target {
3210 expecting_redirection_target = false;
3211 }
3212 match c {
3213 ' ' | '\t' => {}
3214 ';' | '|' | '&' => {
3215 at_command_start = true;
3216 expecting_redirection_target = false;
3217 }
3218 _ => {
3219 if !expecting_redirection_target {
3220 at_command_start = false;
3221 }
3222 }
3223 }
3224 }
3225 Self::push_capture_char(content, c);
3226 self.advance();
3227 }
3228 }
3229 }
3230
3231 false
3232 }
3233
3234 fn read_param_expansion_into(
3238 &mut self,
3239 content: &mut Option<String>,
3240 segment_start: Position,
3241 ) -> bool {
3242 let mut borrowable = true;
3243 let mut depth = 1;
3244 let mut literal_brace_depth = 0usize;
3245 let mut in_single = false;
3246 let mut in_double = false;
3247 let mut double_quote_depth = 0usize;
3248 while let Some(c) = self.peek_char() {
3249 if in_single {
3250 match c {
3251 '\\' => {
3252 let escape_start = self.current_position();
3253 if self.second_char() == Some('"') {
3254 self.advance();
3255 borrowable = false;
3256 self.ensure_capture_from_source(content, segment_start, escape_start);
3257 Self::push_capture_char(content, '"');
3258 self.advance();
3259 } else {
3260 Self::push_capture_char(content, '\\');
3261 self.advance();
3262 }
3263 }
3264 '\'' => {
3265 Self::push_capture_char(content, c);
3266 self.advance();
3267 in_single = false;
3268 }
3269 _ => {
3270 Self::push_capture_char(content, c);
3271 self.advance();
3272 }
3273 }
3274 continue;
3275 }
3276
3277 match c {
3278 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3279 self.advance();
3280 Self::push_capture_char(content, '}');
3281 if depth == 1
3282 && literal_brace_depth > 0
3283 && self.has_later_top_level_param_expansion_closer(depth)
3284 {
3285 literal_brace_depth -= 1;
3286 continue;
3287 }
3288 depth -= 1;
3289 if depth == 0 {
3290 break;
3291 }
3292 }
3293 '{' if !in_single && !in_double => {
3294 literal_brace_depth += 1;
3295 Self::push_capture_char(content, '{');
3296 self.advance();
3297 }
3298 '"' => {
3299 Self::push_capture_char(content, '"');
3301 self.advance();
3302 in_double = !in_double;
3303 double_quote_depth = if in_double { depth } else { 0 };
3304 }
3305 '\'' => {
3306 Self::push_capture_char(content, '\'');
3307 self.advance();
3308 if !in_double {
3309 in_single = true;
3310 }
3311 }
3312 '\\' => {
3313 let escape_start = self.current_position();
3316 self.advance();
3317 if let Some(esc) = self.peek_char() {
3318 match esc {
3319 '$' => {
3320 borrowable = false;
3321 self.ensure_capture_from_source(
3322 content,
3323 segment_start,
3324 escape_start,
3325 );
3326 Self::push_capture_char(content, '\x00');
3327 Self::push_capture_char(content, '$');
3328 self.advance();
3329 }
3330 '"' | '\\' | '`' => {
3331 borrowable = false;
3332 self.ensure_capture_from_source(
3333 content,
3334 segment_start,
3335 escape_start,
3336 );
3337 Self::push_capture_char(content, esc);
3338 self.advance();
3339 }
3340 '}' => {
3341 Self::push_capture_char(content, '\\');
3343 Self::push_capture_char(content, '}');
3344 self.advance();
3345 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3346 }
3347 _ => {
3348 Self::push_capture_char(content, '\\');
3349 Self::push_capture_char(content, esc);
3350 self.advance();
3351 }
3352 }
3353 } else {
3354 Self::push_capture_char(content, '\\');
3355 }
3356 }
3357 '$' => {
3358 Self::push_capture_char(content, '$');
3359 self.advance();
3360 if self.peek_char() == Some('(') {
3361 if self.second_char() == Some('(') {
3362 if !self.read_arithmetic_expansion_into(content) {
3363 borrowable = false;
3364 }
3365 } else {
3366 Self::push_capture_char(content, '(');
3367 self.advance();
3368 self.read_command_subst_into(content);
3369 }
3370 } else if self.peek_char() == Some('{') {
3371 Self::push_capture_char(content, '{');
3372 self.advance();
3373 borrowable &= self.read_param_expansion_into(content, segment_start);
3374 }
3375 }
3376 _ => {
3377 Self::push_capture_char(content, c);
3378 self.advance();
3379 }
3380 }
3381 }
3382 borrowable
3383 }
3384
3385 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3386 let mut chars = self.lookahead_chars().peekable();
3387 let mut depth = target_depth;
3388 let mut in_single = false;
3389 let mut in_double = false;
3390 let mut double_quote_depth = 0usize;
3391
3392 while let Some(ch) = chars.next() {
3393 if in_single {
3394 match ch {
3395 '\'' => in_single = false,
3396 '\\' if chars.peek() == Some(&'"') => {
3397 chars.next();
3398 }
3399 '\\' => {}
3400 _ => {}
3401 }
3402 continue;
3403 }
3404
3405 if in_double {
3406 match ch {
3407 '"' => {
3408 in_double = false;
3409 double_quote_depth = 0;
3410 }
3411 '\\' => {
3412 chars.next();
3413 }
3414 '$' if chars.peek() == Some(&'{') => {
3415 chars.next();
3416 depth += 1;
3417 }
3418 '}' if depth > double_quote_depth => {
3419 depth -= 1;
3420 }
3421 _ => {}
3422 }
3423 continue;
3424 }
3425
3426 match ch {
3427 '\n' if depth == target_depth => return false,
3428 '\'' => in_single = true,
3429 '"' => {
3430 in_double = true;
3431 double_quote_depth = depth;
3432 }
3433 '\\' => {
3434 chars.next();
3435 }
3436 '$' if chars.peek() == Some(&'{') => {
3437 chars.next();
3438 depth += 1;
3439 }
3440 '}' => {
3441 if depth == target_depth {
3442 return true;
3443 }
3444 depth -= 1;
3445 }
3446 _ => {}
3447 }
3448 }
3449
3450 false
3451 }
3452
3453 fn looks_like_brace_expansion(&self) -> bool {
3459 const MAX_LOOKAHEAD: usize = 10_000;
3460
3461 let mut chars = self.lookahead_chars();
3462
3463 if chars.next() != Some('{') {
3465 return false;
3466 }
3467
3468 let mut depth = 1;
3469 let mut paren_depth = 0usize;
3470 let mut has_comma = false;
3471 let mut has_dot_dot = false;
3472 let mut escaped = false;
3473 let mut in_single = false;
3474 let mut in_double = false;
3475 let mut in_backtick = false;
3476 let mut prev_char = None;
3477 let mut scanned = 0usize;
3478
3479 for ch in chars {
3480 scanned += 1;
3481 if scanned > MAX_LOOKAHEAD {
3482 return false;
3483 }
3484
3485 let brace_surface_active = !in_single && !in_double && !in_backtick;
3486 let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3487
3488 match ch {
3489 _ if escaped => {
3490 escaped = false;
3491 }
3492 '\\' if !in_single => escaped = true,
3493 '\'' if !in_double && !in_backtick => in_single = !in_single,
3494 '"' if !in_single && !in_backtick => in_double = !in_double,
3495 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3496 '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3497 paren_depth += 1
3498 }
3499 ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3500 '{' if !in_single && !in_double && !in_backtick => depth += 1,
3501 '}' if !in_single && !in_double && !in_backtick => {
3502 depth -= 1;
3503 if depth == 0 {
3504 return has_comma || has_dot_dot;
3506 }
3507 }
3508 ',' if at_top_level => has_comma = true,
3509 '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3510 ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3512 _ => {}
3513 }
3514 prev_char = Some(ch);
3515 }
3516
3517 false
3518 }
3519
3520 fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3521 let mut brace_depth = 1usize;
3522 let mut paren_depth = 0usize;
3523 let mut escaped = false;
3524 let mut in_single = false;
3525 let mut in_double = false;
3526 let mut in_backtick = false;
3527 let mut prev_char = None;
3528
3529 while let Some(ch) = self.peek_char() {
3530 Self::push_capture_char(word, ch);
3531 self.advance();
3532
3533 if escaped {
3534 escaped = false;
3535 prev_char = Some(ch);
3536 continue;
3537 }
3538
3539 match ch {
3540 '\\' if !in_single => escaped = true,
3541 '\'' if !in_double && !in_backtick => in_single = !in_single,
3542 '"' if !in_single && !in_backtick => in_double = !in_double,
3543 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3544 '(' if !in_single
3545 && !in_double
3546 && !in_backtick
3547 && (paren_depth > 0 || prev_char == Some('$')) =>
3548 {
3549 paren_depth += 1
3550 }
3551 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3552 paren_depth -= 1
3553 }
3554 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3555 '}' if !in_single && !in_double && !in_backtick => {
3556 brace_depth -= 1;
3557 if brace_depth == 0 {
3558 break;
3559 }
3560 }
3561 _ => {}
3562 }
3563
3564 prev_char = Some(ch);
3565 }
3566 }
3567
3568 fn consume_brace_word_body(&mut self, word: &mut String) {
3569 let mut brace_depth = 1usize;
3570 let mut paren_depth = 0usize;
3571 let mut escaped = false;
3572 let mut in_single = false;
3573 let mut in_double = false;
3574 let mut in_backtick = false;
3575 let mut prev_char = None;
3576
3577 while let Some(ch) = self.peek_char() {
3578 word.push(ch);
3579 self.advance();
3580
3581 if escaped {
3582 escaped = false;
3583 prev_char = Some(ch);
3584 continue;
3585 }
3586
3587 match ch {
3588 '\\' if !in_single => escaped = true,
3589 '\'' if !in_double && !in_backtick => in_single = !in_single,
3590 '"' if !in_single && !in_backtick => in_double = !in_double,
3591 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3592 '(' if !in_single
3593 && !in_double
3594 && !in_backtick
3595 && (paren_depth > 0 || prev_char == Some('$')) =>
3596 {
3597 paren_depth += 1
3598 }
3599 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3600 paren_depth -= 1
3601 }
3602 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3603 '}' if !in_single && !in_double && !in_backtick => {
3604 brace_depth -= 1;
3605 if brace_depth == 0 {
3606 break;
3607 }
3608 }
3609 _ => {}
3610 }
3611
3612 prev_char = Some(ch);
3613 }
3614 }
3615
3616 fn looks_like_mid_word_brace_segment(&self) -> bool {
3619 const MAX_LOOKAHEAD: usize = 10_000;
3620
3621 let mut chars = self.lookahead_chars();
3622 if chars.next() != Some('{') {
3623 return false;
3624 }
3625
3626 let mut brace_depth = 1;
3627 let mut paren_depth = 0usize;
3628 let mut escaped = false;
3629 let mut in_single = false;
3630 let mut in_double = false;
3631 let mut in_backtick = false;
3632 let mut prev_char = None;
3633 let mut scanned = 0usize;
3634
3635 for ch in chars {
3636 scanned += 1;
3637 if scanned > MAX_LOOKAHEAD {
3638 return false;
3639 }
3640
3641 if !in_single
3642 && !in_double
3643 && !in_backtick
3644 && !escaped
3645 && brace_depth == 1
3646 && paren_depth == 0
3647 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3648 {
3649 return false;
3650 }
3651
3652 if escaped {
3653 escaped = false;
3654 prev_char = Some(ch);
3655 continue;
3656 }
3657
3658 match ch {
3659 '\\' => escaped = true,
3660 '\'' if !in_double && !in_backtick => in_single = !in_single,
3661 '"' if !in_single && !in_backtick => in_double = !in_double,
3662 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3663 '(' if !in_single
3664 && !in_double
3665 && !in_backtick
3666 && (paren_depth > 0 || prev_char == Some('$')) =>
3667 {
3668 paren_depth += 1
3669 }
3670 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3671 paren_depth -= 1
3672 }
3673 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3674 '}' if !in_single && !in_double && !in_backtick => {
3675 brace_depth -= 1;
3676 if brace_depth == 0 {
3677 return true;
3678 }
3679 }
3680 _ => {}
3681 }
3682
3683 prev_char = Some(ch);
3684 }
3685
3686 false
3687 }
3688
3689 fn is_brace_group_start(&self) -> bool {
3691 let mut chars = self.lookahead_chars();
3692 if chars.next() != Some('{') {
3694 return false;
3695 }
3696 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3698 }
3699
3700 fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3703 const MAX_LOOKAHEAD: usize = 10_000;
3704
3705 let mut chars = self.lookahead_chars();
3706 let mut depth = 1;
3707 let mut has_comma = false;
3708 let mut has_dot_dot = false;
3709 let mut prev_char = None;
3710 let mut scanned = 0usize;
3711
3712 for ch in chars.by_ref() {
3713 scanned += 1;
3714 if scanned > MAX_LOOKAHEAD {
3715 return false;
3716 }
3717 match ch {
3718 '{' => depth += 1,
3719 '}' => {
3720 depth -= 1;
3721 if depth == 0 {
3722 return has_comma || has_dot_dot;
3723 }
3724 }
3725 ',' if depth == 1 => has_comma = true,
3726 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3727 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3728 _ => {}
3729 }
3730 prev_char = Some(ch);
3731 }
3732
3733 false
3734 }
3735
3736 fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3737 let mut chars = self.lookahead_chars();
3738 if chars.next() != Some('{') {
3739 return false;
3740 }
3741 chars.next() == Some(')')
3742 }
3743
3744 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3746 let mut word = String::with_capacity(16);
3747
3748 if let Some('{') = self.peek_char() {
3749 word.push('{');
3750 self.advance();
3751 } else {
3752 return None;
3753 }
3754
3755 self.consume_brace_word_body(&mut word);
3756
3757 while let Some(ch) = self.peek_char() {
3758 if Self::is_word_char(ch) {
3759 if self.reinject_buf.is_empty() {
3760 let chunk = self.cursor.eat_while(Self::is_word_char);
3761 word.push_str(chunk);
3762 self.advance_scanned_source_bytes(chunk.len());
3763 } else {
3764 word.push(ch);
3765 self.advance();
3766 }
3767 } else {
3768 break;
3769 }
3770 }
3771
3772 Some(LexedToken::owned_word(TokenKind::Word, word))
3773 }
3774
3775 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3777 let mut word = String::with_capacity(16);
3778
3779 if let Some('{') = self.peek_char() {
3781 word.push('{');
3782 self.advance();
3783 } else {
3784 return None;
3785 }
3786
3787 self.consume_brace_word_body(&mut word);
3789
3790 while let Some(ch) = self.peek_char() {
3792 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3793 if ch == '{' {
3794 word.push(ch);
3796 self.advance();
3797 self.consume_brace_word_body(&mut word);
3798 } else {
3799 word.push(ch);
3800 self.advance();
3801 }
3802 } else {
3803 break;
3804 }
3805 }
3806
3807 Some(LexedToken::owned_word(TokenKind::Word, word))
3808 }
3809
3810 fn looks_like_assoc_assign(&self) -> bool {
3814 let mut chars = self.lookahead_chars();
3815 if chars.next() != Some('(') {
3817 return false;
3818 }
3819 for ch in chars {
3821 match ch {
3822 ' ' | '\t' => continue,
3823 '[' => return true,
3824 _ => return false,
3825 }
3826 }
3827 false
3828 }
3829
3830 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3831 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3832 }
3833
3834 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3835 word.segments().any(|segment| {
3836 matches!(
3837 segment.kind(),
3838 LexedWordSegmentKind::SingleQuoted
3839 | LexedWordSegmentKind::DollarSingleQuoted
3840 | LexedWordSegmentKind::DoubleQuoted
3841 | LexedWordSegmentKind::DollarDoubleQuoted
3842 )
3843 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3844 }
3845
3846 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3847 text.contains(['*', '?'])
3848 || text.ends_with('}') && text.contains("${")
3849 || text.ends_with(']')
3850 && text
3851 .rfind('[')
3852 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3853 }
3854
3855 fn is_word_char(ch: char) -> bool {
3856 !matches!(
3857 ch,
3858 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3859 )
3860 }
3861
3862 const fn is_ascii_word_byte(byte: u8) -> bool {
3863 !matches!(
3864 byte,
3865 b' ' | b'\t'
3866 | b'\n'
3867 | b';'
3868 | b'|'
3869 | b'&'
3870 | b'>'
3871 | b'<'
3872 | b'('
3873 | b')'
3874 | b'{'
3875 | b'}'
3876 | b'\''
3877 | b'"'
3878 )
3879 }
3880
3881 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3882 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3883 }
3884
3885 fn is_plain_word_char(ch: char) -> bool {
3886 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3887 }
3888
3889 pub(super) fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3891 let mut content = String::with_capacity(64);
3892 let mut current_line = String::with_capacity(64);
3893
3894 let mut rest_of_line = String::with_capacity(32);
3901 let rest_of_line_start = self.current_position();
3902 let mut in_double_quote = false;
3903 let mut in_single_quote = false;
3904 let mut in_comment = false;
3905 let mut saw_non_whitespace_tail = false;
3906 let mut consecutive_backslashes = 0usize;
3907 let mut previous_tail_char = None;
3908 while let Some(ch) = self.peek_char() {
3909 self.advance();
3910 if in_comment {
3911 if ch == '\n' {
3912 break;
3913 }
3914 rest_of_line.push(ch);
3915 previous_tail_char = Some(ch);
3916 continue;
3917 }
3918 if ch == '#'
3919 && !in_single_quote
3920 && !in_double_quote
3921 && self.comments_enabled()
3922 && heredoc_tail_hash_starts_comment(previous_tail_char)
3923 {
3924 in_comment = true;
3925 rest_of_line.push(ch);
3926 previous_tail_char = Some(ch);
3927 consecutive_backslashes = 0;
3928 continue;
3929 }
3930 let backslash_continues_line = ch == '\\'
3931 && !in_single_quote
3932 && self.peek_char() == Some('\n')
3933 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3934 && consecutive_backslashes.is_multiple_of(2);
3935 if backslash_continues_line {
3936 rest_of_line.push(ch);
3937 rest_of_line.push('\n');
3938 self.advance();
3939 consecutive_backslashes = 0;
3940 continue;
3941 }
3942 if ch == '\n' && !in_double_quote && !in_single_quote {
3943 break;
3944 }
3945 if ch == '"' && !in_single_quote {
3946 in_double_quote = !in_double_quote;
3947 } else if ch == '\'' && !in_double_quote {
3948 in_single_quote = !in_single_quote;
3949 } else if ch == '\\' && in_double_quote {
3950 rest_of_line.push(ch);
3952 if let Some(next) = self.peek_char() {
3953 rest_of_line.push(next);
3954 self.advance();
3955 }
3956 continue;
3957 }
3958 rest_of_line.push(ch);
3959 if !ch.is_whitespace() {
3960 saw_non_whitespace_tail = true;
3961 }
3962 if ch == '\\' && !in_single_quote {
3963 consecutive_backslashes += 1;
3964 } else {
3965 consecutive_backslashes = 0;
3966 }
3967 previous_tail_char = Some(ch);
3968 }
3969
3970 self.sync_offset_to_cursor();
3974 let content_start = self.current_position();
3975 let mut current_line_start = content_start;
3976 let content_end;
3977
3978 loop {
3980 if self.reinject_buf.is_empty() {
3981 self.sync_offset_to_cursor();
3987 let rest = self.cursor.rest();
3988 if rest.is_empty() {
3989 content_end = self.current_position();
3990 break;
3991 }
3992
3993 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3994 let line = &rest[..line_len];
3995 let has_newline = line_len < rest.len();
3996
3997 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3998 content_end = current_line_start;
3999 self.consume_source_bytes(line_len);
4000 if has_newline {
4001 self.consume_ascii_chars(1);
4002 }
4003 break;
4004 }
4005
4006 content.push_str(line);
4007 self.consume_source_bytes(line_len);
4008
4009 if has_newline {
4010 self.consume_ascii_chars(1);
4011 content.push('\n');
4012 current_line_start = self.current_position();
4013 continue;
4014 }
4015
4016 content_end = self.current_position();
4017 break;
4018 }
4019
4020 match self.peek_char() {
4021 Some('\n') => {
4022 self.advance();
4023 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4025 content_end = current_line_start;
4026 break;
4027 }
4028 content.push_str(¤t_line);
4029 content.push('\n');
4030 current_line.clear();
4031 current_line_start = self.current_position();
4032 }
4033 Some(ch) => {
4034 current_line.push(ch);
4035 self.advance();
4036 }
4037 None => {
4038 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4040 content_end = current_line_start;
4041 break;
4042 }
4043 if !current_line.is_empty() {
4044 content.push_str(¤t_line);
4045 }
4046 content_end = self.current_position();
4047 break;
4048 }
4049 }
4050 }
4051
4052 let post_heredoc_offset = self.offset;
4057 self.offset = rest_of_line_start.offset;
4058 for ch in rest_of_line.chars() {
4059 self.reinject_buf.push_back(ch);
4060 }
4061 self.reinject_buf.push_back('\n');
4062 self.reinject_resume_offset = Some(post_heredoc_offset);
4063
4064 HeredocRead {
4065 content,
4066 content_span: Span::from_positions(content_start, content_end),
4067 }
4068 }
4069
4070 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4071 let mut chars = self.cursor.rest().chars();
4072 if chars.next() != Some('\n') {
4073 return false;
4074 }
4075
4076 for ch in chars {
4077 if matches!(ch, ' ' | '\t') {
4078 continue;
4079 }
4080 if ch == '\n' {
4081 return false;
4082 }
4083 return matches!(ch, '|' | '&' | ';' | '<' | '>')
4084 || (ch == '#' && self.comments_enabled());
4085 }
4086
4087 false
4088 }
4089}
4090
4091fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4092 let line = if strip_tabs {
4093 line.trim_start_matches('\t')
4094 } else {
4095 line
4096 };
4097
4098 if line == delimiter {
4099 return true;
4100 }
4101
4102 let Some(trailing) = line.strip_prefix(delimiter) else {
4103 return false;
4104 };
4105
4106 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4107}
4108
4109fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4110 previous_tail_char.is_none_or(|prev| {
4111 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4112 })
4113}
4114
4115fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4116 let ch = input.get(index..)?.chars().next()?;
4117 Some((ch, index + ch.len_utf8()))
4118}
4119
4120fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4121 let mut index = 0usize;
4122 let mut depth = 0usize;
4123 let mut in_single = false;
4124 let mut in_double = false;
4125 let mut in_backtick = false;
4126 let mut escaped = false;
4127
4128 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4129 let was_escaped = escaped;
4130 if ch == '\\' && !in_single {
4131 escaped = !escaped;
4132 index = next_index;
4133 continue;
4134 }
4135 escaped = false;
4136
4137 match ch {
4138 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4139 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4140 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4141 '(' if !in_single
4142 && !in_double
4143 && !in_backtick
4144 && !was_escaped
4145 && prefix[next_index..].starts_with('(') =>
4146 {
4147 depth += 1;
4148 index = next_index + '('.len_utf8();
4149 continue;
4150 }
4151 ')' if !in_single
4152 && !in_double
4153 && !in_backtick
4154 && !was_escaped
4155 && prefix[next_index..].starts_with(')') =>
4156 {
4157 depth = depth.saturating_sub(1);
4158 index = next_index + ')'.len_utf8();
4159 continue;
4160 }
4161 _ => {}
4162 }
4163
4164 index = next_index;
4165 }
4166
4167 depth > 0
4168}
4169
4170fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4171 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4172 let prefix = &input[line_start..index];
4173 line_has_unclosed_double_paren(prefix)
4174}
4175
4176fn hash_starts_comment(input: &str, index: usize) -> bool {
4177 if inside_unclosed_double_paren_on_line(input, index) {
4178 return false;
4179 }
4180
4181 let next = &input[index + '#'.len_utf8()..];
4182 input[..index]
4183 .chars()
4184 .next_back()
4185 .is_none_or(|prev| match prev {
4186 '(' => {
4187 let whitespace_index = next.find(char::is_whitespace);
4188 let close_index = next.find(')');
4189
4190 match (whitespace_index, close_index) {
4191 (Some(whitespace), Some(close)) => whitespace < close,
4192 (Some(_), None) | (None, None) => true,
4193 (None, Some(_)) => false,
4194 }
4195 }
4196 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4197 })
4198}
4199
4200fn heredoc_delimiter_is_terminator(
4201 ch: char,
4202 in_single: bool,
4203 in_double: bool,
4204 escaped: bool,
4205) -> bool {
4206 !in_single
4207 && !in_double
4208 && !escaped
4209 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4210}
4211
4212fn scan_double_quoted_command_substitution_segment(
4213 input: &str,
4214 mut index: usize,
4215 subst_depth: usize,
4216) -> Option<usize> {
4217 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4218 match ch {
4219 '"' => return Some(next_index),
4220 '\\' => {
4221 index = next_index;
4222 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4223 index = escaped_next;
4224 }
4225 }
4226 '$' if input[next_index..].starts_with('{') => {
4227 let consumed = scan_command_subst_parameter_expansion_len(
4228 &input[next_index + '{'.len_utf8()..],
4229 subst_depth,
4230 0,
4231 )?;
4232 index = next_index + '{'.len_utf8() + consumed;
4233 }
4234 '$' if input[next_index..].starts_with('(')
4235 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4236 {
4237 let consumed = scan_command_substitution_body_len_inner(
4238 &input[next_index + '('.len_utf8()..],
4239 subst_depth + 1,
4240 )?;
4241 index = next_index + '('.len_utf8() + consumed;
4242 }
4243 _ => index = next_index,
4244 }
4245 }
4246
4247 None
4248}
4249
4250fn scan_command_subst_parameter_expansion_len(
4251 input: &str,
4252 subst_depth: usize,
4253 parameter_depth: usize,
4254) -> Option<usize> {
4255 if parameter_depth >= MAX_PARAMETER_EXPANSION_SCAN_DEPTH {
4256 return scan_command_subst_parameter_expansion_len_balanced(input, subst_depth);
4257 }
4258
4259 let mut index = 0usize;
4260 let mut in_single = false;
4261 let mut in_double = false;
4262 let mut in_ansi_c_single = false;
4263 let mut in_backtick = false;
4264 let mut escaped = false;
4265 let mut ansi_c_quote_pending = false;
4266
4267 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4268 let was_escaped = escaped;
4269 if ch == '\\' && !in_single {
4270 escaped = !escaped;
4271 index = next_index;
4272 ansi_c_quote_pending = false;
4273 continue;
4274 }
4275 escaped = false;
4276
4277 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4278 if input[next_index..].starts_with('{')
4279 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4280 &input[next_index + '{'.len_utf8()..],
4281 subst_depth,
4282 parameter_depth + 1,
4283 )
4284 {
4285 index = next_index + '{'.len_utf8() + consumed;
4286 ansi_c_quote_pending = false;
4287 continue;
4288 }
4289
4290 if input[next_index..].starts_with('(')
4291 && !input[next_index + '('.len_utf8()..].starts_with('(')
4292 && let Some(consumed) = scan_command_substitution_body_len_inner(
4293 &input[next_index + '('.len_utf8()..],
4294 subst_depth + 1,
4295 )
4296 {
4297 index = next_index + '('.len_utf8() + consumed;
4298 ansi_c_quote_pending = false;
4299 continue;
4300 }
4301 }
4302
4303 if !in_single
4304 && !in_ansi_c_single
4305 && !in_double
4306 && !in_backtick
4307 && !was_escaped
4308 && matches!(ch, '<' | '>')
4309 && input[next_index..].starts_with('(')
4310 && let Some(consumed) = scan_command_substitution_body_len_inner(
4311 &input[next_index + '('.len_utf8()..],
4312 subst_depth + 1,
4313 )
4314 {
4315 index = next_index + '('.len_utf8() + consumed;
4316 ansi_c_quote_pending = false;
4317 continue;
4318 }
4319
4320 match ch {
4321 '\'' if !in_double && !in_backtick && !was_escaped => {
4322 if in_ansi_c_single {
4323 in_ansi_c_single = false;
4324 } else if !in_single && ansi_c_quote_pending {
4325 in_ansi_c_single = true;
4326 } else {
4327 in_single = !in_single;
4328 }
4329 }
4330 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4331 in_double = !in_double
4332 }
4333 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4334 in_backtick = !in_backtick
4335 }
4336 '}' if !in_single
4337 && !in_ansi_c_single
4338 && !in_double
4339 && !in_backtick
4340 && !was_escaped =>
4341 {
4342 return Some(next_index);
4343 }
4344 _ => {}
4345 }
4346
4347 ansi_c_quote_pending = ch == '$'
4348 && !in_single
4349 && !in_ansi_c_single
4350 && !in_double
4351 && !in_backtick
4352 && !was_escaped;
4353 index = next_index;
4354 }
4355
4356 None
4357}
4358
4359fn scan_command_subst_parameter_expansion_len_balanced(
4360 input: &str,
4361 subst_depth: usize,
4362) -> Option<usize> {
4363 let mut index = 0usize;
4364 let mut brace_depth = 1usize;
4365 let mut in_single = false;
4366 let mut in_double = false;
4367 let mut in_ansi_c_single = false;
4368 let mut in_backtick = false;
4369 let mut escaped = false;
4370 let mut ansi_c_quote_pending = false;
4371
4372 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4373 let was_escaped = escaped;
4374 if ch == '\\' && !in_single {
4375 escaped = !escaped;
4376 index = next_index;
4377 ansi_c_quote_pending = false;
4378 continue;
4379 }
4380 escaped = false;
4381
4382 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4383 if input[next_index..].starts_with('{') {
4384 brace_depth = brace_depth.saturating_add(1);
4385 index = next_index + '{'.len_utf8();
4386 ansi_c_quote_pending = false;
4387 continue;
4388 }
4389
4390 if input[next_index..].starts_with('(')
4391 && !input[next_index + '('.len_utf8()..].starts_with('(')
4392 && let Some(consumed) = scan_command_substitution_body_len_inner(
4393 &input[next_index + '('.len_utf8()..],
4394 subst_depth + 1,
4395 )
4396 {
4397 index = next_index + '('.len_utf8() + consumed;
4398 ansi_c_quote_pending = false;
4399 continue;
4400 }
4401 }
4402
4403 if !in_single
4404 && !in_ansi_c_single
4405 && !in_double
4406 && !in_backtick
4407 && !was_escaped
4408 && matches!(ch, '<' | '>')
4409 && input[next_index..].starts_with('(')
4410 && let Some(consumed) = scan_command_substitution_body_len_inner(
4411 &input[next_index + '('.len_utf8()..],
4412 subst_depth + 1,
4413 )
4414 {
4415 index = next_index + '('.len_utf8() + consumed;
4416 ansi_c_quote_pending = false;
4417 continue;
4418 }
4419
4420 match ch {
4421 '\'' if !in_double && !in_backtick && !was_escaped => {
4422 if in_ansi_c_single {
4423 in_ansi_c_single = false;
4424 } else if !in_single && ansi_c_quote_pending {
4425 in_ansi_c_single = true;
4426 } else {
4427 in_single = !in_single;
4428 }
4429 }
4430 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4431 in_double = !in_double
4432 }
4433 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4434 in_backtick = !in_backtick
4435 }
4436 '}' if !in_single
4437 && !in_ansi_c_single
4438 && !in_double
4439 && !in_backtick
4440 && !was_escaped =>
4441 {
4442 brace_depth = brace_depth.saturating_sub(1);
4443 if brace_depth == 0 {
4444 return Some(next_index);
4445 }
4446 }
4447 _ => {}
4448 }
4449
4450 ansi_c_quote_pending = ch == '$'
4451 && !in_single
4452 && !in_ansi_c_single
4453 && !in_double
4454 && !in_backtick
4455 && !was_escaped;
4456 index = next_index;
4457 }
4458
4459 None
4460}
4461
4462fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4463 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4464 if !matches!(ch, ' ' | '\t') {
4465 break;
4466 }
4467 index = next_index;
4468 }
4469
4470 let start = index;
4471 let mut cooked = String::new();
4472 let mut in_single = false;
4473 let mut in_double = false;
4474 let mut escaped = false;
4475
4476 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4477 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4478 break;
4479 }
4480
4481 index = next_index;
4482 if escaped {
4483 cooked.push(ch);
4484 escaped = false;
4485 continue;
4486 }
4487
4488 match ch {
4489 '\\' if !in_single => escaped = true,
4490 '\'' if !in_double => in_single = !in_single,
4491 '"' if !in_single => in_double = !in_double,
4492 _ => cooked.push(ch),
4493 }
4494 }
4495
4496 (index > start).then_some((index, cooked))
4497}
4498
4499fn skip_command_subst_pending_heredoc(
4500 input: &str,
4501 mut index: usize,
4502 delimiter: &str,
4503 strip_tabs: bool,
4504) -> usize {
4505 while index <= input.len() {
4506 let rest = &input[index..];
4507 let line_len = rest.find('\n').unwrap_or(rest.len());
4508 let line = &rest[..line_len];
4509 let has_newline = line_len < rest.len();
4510
4511 index += line_len;
4512 if has_newline {
4513 index += '\n'.len_utf8();
4514 }
4515
4516 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4517 return index;
4518 }
4519 }
4520
4521 index
4522}
4523
4524fn scan_command_subst_ansi_c_single_quoted_segment(
4525 input: &str,
4526 quote_index: usize,
4527) -> Option<usize> {
4528 let mut index = quote_index + '\''.len_utf8();
4529
4530 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4531 index = next_index;
4532 if ch == '\\' {
4533 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4534 index = escaped_next;
4535 }
4536 continue;
4537 }
4538
4539 if ch == '\'' {
4540 return Some(index);
4541 }
4542 }
4543
4544 None
4545}
4546
4547fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4548 let mut index = start;
4549
4550 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4551 index = next_index;
4552 if ch == '\\' {
4553 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4554 index = escaped_next;
4555 }
4556 continue;
4557 }
4558
4559 if ch == '`' {
4560 return Some(index);
4561 }
4562 }
4563
4564 None
4565}
4566
4567fn flush_scanned_command_subst_keyword(
4568 current_word: &mut String,
4569 pending_case_headers: &mut usize,
4570 case_clause_depths: &mut SmallVec<[usize; 4]>,
4571 depth: usize,
4572 word_started_at_command_start: &mut bool,
4573) {
4574 if current_word.is_empty() {
4575 *word_started_at_command_start = false;
4576 return;
4577 }
4578
4579 match current_word.as_str() {
4580 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4581 "in" if *pending_case_headers > 0 => {
4582 *pending_case_headers -= 1;
4583 case_clause_depths.push(depth);
4584 }
4585 "esac" if *word_started_at_command_start => {
4586 case_clause_depths.pop();
4587 }
4588 _ => {}
4589 }
4590
4591 current_word.clear();
4592 *word_started_at_command_start = false;
4593}
4594
4595pub(super) fn scan_command_substitution_body_len_inner(
4596 input: &str,
4597 subst_depth: usize,
4598) -> Option<usize> {
4599 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4600 return None;
4601 }
4602
4603 let mut index = 0usize;
4604 let mut depth = 1;
4605 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4606 let mut pending_case_headers = 0usize;
4607 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4608 let mut current_word = String::with_capacity(16);
4609 let mut at_command_start = true;
4610 let mut expecting_redirection_target = false;
4611 let mut current_word_started_at_command_start = false;
4612
4613 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4614 match ch {
4615 '#' if hash_starts_comment(input, index) => {
4616 let had_word = !current_word.is_empty();
4617 flush_scanned_command_subst_keyword(
4618 &mut current_word,
4619 &mut pending_case_headers,
4620 &mut case_clause_depths,
4621 depth,
4622 &mut current_word_started_at_command_start,
4623 );
4624 if had_word && expecting_redirection_target {
4625 expecting_redirection_target = false;
4626 }
4627 index = next_index;
4628 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4629 index = comment_next;
4630 if comment_ch == '\n' {
4631 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4632 index = skip_command_subst_pending_heredoc(
4633 input, index, &delimiter, strip_tabs,
4634 );
4635 }
4636 at_command_start = true;
4637 expecting_redirection_target = false;
4638 break;
4639 }
4640 }
4641 }
4642 '(' => {
4643 flush_scanned_command_subst_keyword(
4644 &mut current_word,
4645 &mut pending_case_headers,
4646 &mut case_clause_depths,
4647 depth,
4648 &mut current_word_started_at_command_start,
4649 );
4650 depth += 1;
4651 index = next_index;
4652 at_command_start = true;
4653 expecting_redirection_target = false;
4654 }
4655 ')' => {
4656 flush_scanned_command_subst_keyword(
4657 &mut current_word,
4658 &mut pending_case_headers,
4659 &mut case_clause_depths,
4660 depth,
4661 &mut current_word_started_at_command_start,
4662 );
4663 if case_clause_depths
4664 .last()
4665 .is_some_and(|case_depth| *case_depth == depth)
4666 {
4667 index = next_index;
4668 at_command_start = true;
4669 expecting_redirection_target = false;
4670 continue;
4671 }
4672 depth -= 1;
4673 index = next_index;
4674 if depth == 0 {
4675 return Some(index);
4676 }
4677 at_command_start = false;
4678 expecting_redirection_target = false;
4679 }
4680 '"' => {
4681 let had_word = !current_word.is_empty();
4682 flush_scanned_command_subst_keyword(
4683 &mut current_word,
4684 &mut pending_case_headers,
4685 &mut case_clause_depths,
4686 depth,
4687 &mut current_word_started_at_command_start,
4688 );
4689 if had_word && expecting_redirection_target {
4690 expecting_redirection_target = false;
4691 }
4692 index = scan_double_quoted_command_substitution_segment(
4693 input,
4694 next_index,
4695 subst_depth,
4696 )?;
4697 if expecting_redirection_target {
4698 expecting_redirection_target = false;
4699 } else {
4700 at_command_start = false;
4701 }
4702 }
4703 '\'' => {
4704 let had_word = !current_word.is_empty();
4705 flush_scanned_command_subst_keyword(
4706 &mut current_word,
4707 &mut pending_case_headers,
4708 &mut case_clause_depths,
4709 depth,
4710 &mut current_word_started_at_command_start,
4711 );
4712 if had_word && expecting_redirection_target {
4713 expecting_redirection_target = false;
4714 }
4715 index = next_index;
4716 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4717 index = quoted_next;
4718 if quoted_ch == '\'' {
4719 break;
4720 }
4721 }
4722 if expecting_redirection_target {
4723 expecting_redirection_target = false;
4724 } else {
4725 at_command_start = false;
4726 }
4727 }
4728 '`' => {
4729 let had_word = !current_word.is_empty();
4730 flush_scanned_command_subst_keyword(
4731 &mut current_word,
4732 &mut pending_case_headers,
4733 &mut case_clause_depths,
4734 depth,
4735 &mut current_word_started_at_command_start,
4736 );
4737 if had_word && expecting_redirection_target {
4738 expecting_redirection_target = false;
4739 }
4740 index = scan_command_subst_backtick_segment(input, next_index)?;
4741 if expecting_redirection_target {
4742 expecting_redirection_target = false;
4743 } else {
4744 at_command_start = false;
4745 }
4746 }
4747 '$' if input[next_index..].starts_with('\'') => {
4748 let had_word = !current_word.is_empty();
4749 flush_scanned_command_subst_keyword(
4750 &mut current_word,
4751 &mut pending_case_headers,
4752 &mut case_clause_depths,
4753 depth,
4754 &mut current_word_started_at_command_start,
4755 );
4756 if had_word && expecting_redirection_target {
4757 expecting_redirection_target = false;
4758 }
4759 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4760 if expecting_redirection_target {
4761 expecting_redirection_target = false;
4762 } else {
4763 at_command_start = false;
4764 }
4765 }
4766 '\\' => {
4767 let had_word = !current_word.is_empty();
4768 flush_scanned_command_subst_keyword(
4769 &mut current_word,
4770 &mut pending_case_headers,
4771 &mut case_clause_depths,
4772 depth,
4773 &mut current_word_started_at_command_start,
4774 );
4775 if had_word && expecting_redirection_target {
4776 expecting_redirection_target = false;
4777 }
4778 index = next_index;
4779 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4780 index = escaped_next;
4781 }
4782 if expecting_redirection_target {
4783 expecting_redirection_target = false;
4784 } else {
4785 at_command_start = false;
4786 }
4787 }
4788 '>' => {
4789 let word_was_redirection_fd = current_word_started_at_command_start
4790 && !current_word.is_empty()
4791 && current_word.chars().all(|current| current.is_ascii_digit());
4792 flush_scanned_command_subst_keyword(
4793 &mut current_word,
4794 &mut pending_case_headers,
4795 &mut case_clause_depths,
4796 depth,
4797 &mut current_word_started_at_command_start,
4798 );
4799 if word_was_redirection_fd {
4800 at_command_start = true;
4801 }
4802 index = next_index;
4803 expecting_redirection_target = true;
4804 }
4805 '<' if input[next_index..].starts_with('<') => {
4806 let word_was_redirection_fd = current_word_started_at_command_start
4807 && !current_word.is_empty()
4808 && current_word.chars().all(|current| current.is_ascii_digit());
4809 let had_word = !current_word.is_empty();
4810 flush_scanned_command_subst_keyword(
4811 &mut current_word,
4812 &mut pending_case_headers,
4813 &mut case_clause_depths,
4814 depth,
4815 &mut current_word_started_at_command_start,
4816 );
4817 if had_word && expecting_redirection_target {
4818 expecting_redirection_target = false;
4819 }
4820 if word_was_redirection_fd {
4821 at_command_start = true;
4822 }
4823 if inside_unclosed_double_paren_on_line(input, index) {
4824 index = next_index + '<'.len_utf8();
4825 continue;
4826 }
4827
4828 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4829 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4830 expecting_redirection_target = true;
4831 continue;
4832 }
4833
4834 let strip_tabs = input[next_index..].starts_with("<-");
4835 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4836 if let Some((delimiter_index, delimiter)) =
4837 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4838 {
4839 pending_heredocs.push((delimiter, strip_tabs));
4840 index = delimiter_index;
4841 expecting_redirection_target = false;
4842 } else {
4843 index = next_index;
4844 expecting_redirection_target = true;
4845 }
4846 }
4847 '\n' => {
4848 flush_scanned_command_subst_keyword(
4849 &mut current_word,
4850 &mut pending_case_headers,
4851 &mut case_clause_depths,
4852 depth,
4853 &mut current_word_started_at_command_start,
4854 );
4855 index = next_index;
4856 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4857 index =
4858 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4859 }
4860 at_command_start = true;
4861 expecting_redirection_target = false;
4862 }
4863 '$' if input[next_index..].starts_with('{') => {
4864 let had_word = !current_word.is_empty();
4865 flush_scanned_command_subst_keyword(
4866 &mut current_word,
4867 &mut pending_case_headers,
4868 &mut case_clause_depths,
4869 depth,
4870 &mut current_word_started_at_command_start,
4871 );
4872 if had_word && expecting_redirection_target {
4873 expecting_redirection_target = false;
4874 }
4875 let consumed = scan_command_subst_parameter_expansion_len(
4876 &input[next_index + '{'.len_utf8()..],
4877 subst_depth,
4878 0,
4879 )?;
4880 index = next_index + '{'.len_utf8() + consumed;
4881 if expecting_redirection_target {
4882 expecting_redirection_target = false;
4883 } else {
4884 at_command_start = false;
4885 }
4886 }
4887 '$' if input[next_index..].starts_with('(')
4888 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4889 {
4890 let had_word = !current_word.is_empty();
4891 flush_scanned_command_subst_keyword(
4892 &mut current_word,
4893 &mut pending_case_headers,
4894 &mut case_clause_depths,
4895 depth,
4896 &mut current_word_started_at_command_start,
4897 );
4898 if had_word && expecting_redirection_target {
4899 expecting_redirection_target = false;
4900 }
4901 let consumed = scan_command_substitution_body_len_inner(
4902 &input[next_index + '('.len_utf8()..],
4903 subst_depth + 1,
4904 )?;
4905 index = next_index + '('.len_utf8() + consumed;
4906 if expecting_redirection_target {
4907 expecting_redirection_target = false;
4908 } else {
4909 at_command_start = false;
4910 }
4911 }
4912 _ => {
4913 if ch.is_ascii_alphanumeric() || ch == '_' {
4914 if current_word.is_empty() && !expecting_redirection_target && at_command_start
4915 {
4916 current_word_started_at_command_start = true;
4917 at_command_start = false;
4918 }
4919 current_word.push(ch);
4920 } else {
4921 let had_word = !current_word.is_empty();
4922 flush_scanned_command_subst_keyword(
4923 &mut current_word,
4924 &mut pending_case_headers,
4925 &mut case_clause_depths,
4926 depth,
4927 &mut current_word_started_at_command_start,
4928 );
4929 if had_word && expecting_redirection_target {
4930 expecting_redirection_target = false;
4931 }
4932 match ch {
4933 ' ' | '\t' => {}
4934 ';' | '|' | '&' => {
4935 at_command_start = true;
4936 expecting_redirection_target = false;
4937 }
4938 _ => {
4939 if !expecting_redirection_target {
4940 at_command_start = false;
4941 }
4942 }
4943 }
4944 }
4945 index = next_index;
4946 }
4947 }
4948 }
4949
4950 None
4951}
4952
4953pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4954 scan_command_substitution_body_len_inner(input, 0)
4955}
4956
4957#[cfg(test)]
4958mod tests {
4959 use super::*;
4960
4961 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4962 match token.kind {
4963 kind if kind.is_word_like() => token.word_string(),
4964 TokenKind::Comment => token
4965 .span
4966 .slice(source)
4967 .strip_prefix('#')
4968 .map(str::to_string),
4969 TokenKind::Error => token
4970 .error_kind()
4971 .map(LexerErrorKind::message)
4972 .map(str::to_string),
4973 _ => None,
4974 }
4975 }
4976
4977 fn assert_next_token(
4978 lexer: &mut Lexer<'_>,
4979 expected_kind: TokenKind,
4980 expected_text: Option<&str>,
4981 ) {
4982 let token = lexer.next_lexed_token().unwrap();
4983 assert_eq!(token.kind, expected_kind);
4984 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4985 }
4986
4987 fn assert_next_token_with_comments(
4988 lexer: &mut Lexer<'_>,
4989 expected_kind: TokenKind,
4990 expected_text: Option<&str>,
4991 ) {
4992 let token = lexer.next_lexed_token_with_comments().unwrap();
4993 assert_eq!(token.kind, expected_kind);
4994 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4995 }
4996
4997 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4998 let mut lexer = Lexer::new(input);
4999
5000 while let Some(token) = lexer.next_lexed_token() {
5001 if token.kind == TokenKind::Newline {
5002 continue;
5003 }
5004
5005 assert_eq!(
5006 token.span.start.line, token.span.end.line,
5007 "token should stay on one line: {:?}",
5008 token
5009 );
5010 }
5011 }
5012
5013 #[test]
5014 fn test_simple_words() {
5015 let mut lexer = Lexer::new("echo hello world");
5016
5017 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5018 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5019 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5020 assert!(lexer.next_lexed_token().is_none());
5021 }
5022
5023 #[test]
5024 fn test_single_quoted_string() {
5025 let mut lexer = Lexer::new("echo 'hello world'");
5026
5027 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5028 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
5030 assert!(lexer.next_lexed_token().is_none());
5031 }
5032
5033 #[test]
5034 fn test_double_quoted_string() {
5035 let mut lexer = Lexer::new("echo \"hello world\"");
5036
5037 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5038 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
5039 assert!(lexer.next_lexed_token().is_none());
5040 }
5041
5042 #[test]
5043 fn test_brace_expansion_token_ignores_quoted_closers() {
5044 let mut lexer = Lexer::new("echo {\"}\",a}\n");
5045
5046 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5047 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
5048 assert_next_token(&mut lexer, TokenKind::Newline, None);
5049 assert!(lexer.next_lexed_token().is_none());
5050 }
5051
5052 #[test]
5053 fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
5054 let mut lexer = Lexer::new("echo {'a\\',b} next\n");
5055
5056 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5057 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
5058 assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
5059 assert_next_token(&mut lexer, TokenKind::Newline, None);
5060 assert!(lexer.next_lexed_token().is_none());
5061 }
5062
5063 #[test]
5064 fn test_double_quoted_expansion_token_keeps_source_backing() {
5065 let source = r#""$bar""#;
5066 let mut lexer = Lexer::new(source);
5067
5068 let token = lexer.next_lexed_token().unwrap();
5069 assert_eq!(token.kind, TokenKind::QuotedWord);
5070 assert_eq!(token.word_text(), Some("$bar"));
5071
5072 let word = token.word().unwrap();
5073 let segment = word.single_segment().unwrap();
5074 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5075 assert_eq!(segment.span().unwrap().slice(source), "$bar");
5076 }
5077
5078 #[test]
5079 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
5080 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
5081 let mut lexer = Lexer::new(source);
5082
5083 let token = lexer.next_lexed_token().unwrap();
5084 assert_eq!(token.kind, TokenKind::QuotedWord);
5085 assert_eq!(
5086 token.word_text(),
5087 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
5088 );
5089 }
5090
5091 #[test]
5092 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
5093 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
5094 let mut lexer = Lexer::new(source);
5095
5096 let token = lexer.next_lexed_token().unwrap();
5097 assert_eq!(token.kind, TokenKind::QuotedWord);
5098 assert_eq!(
5099 token.word_text(),
5100 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
5101 );
5102 }
5103
5104 #[test]
5105 fn test_deep_command_substitution_preserves_simple_parameter_expansion() {
5106 let source = r#""$(echo "$(echo "$(echo "$(echo "${name}")")")")""#;
5107 let mut lexer = Lexer::new(source);
5108
5109 let token = lexer.next_lexed_token().unwrap();
5110 assert_eq!(token.kind, TokenKind::QuotedWord);
5111 assert_eq!(
5112 token.word_text(),
5113 Some(r#"$(echo "$(echo "$(echo "$(echo "${name}")")")")"#)
5114 );
5115 }
5116
5117 #[test]
5118 fn test_command_substitution_preserves_deep_parameter_operand_paren() {
5119 let source = r#""$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")""#;
5120 let mut lexer = Lexer::new(source);
5121
5122 let token = lexer.next_lexed_token().unwrap();
5123 assert_eq!(token.kind, TokenKind::QuotedWord);
5124 assert_eq!(
5125 token.word_text(),
5126 Some(r#"$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")"#)
5127 );
5128 }
5129
5130 #[test]
5131 fn test_mixed_word_keeps_segment_kinds() {
5132 let source = r#"foo"bar"'baz'"#;
5133 let mut lexer = Lexer::new(source);
5134
5135 let token = lexer.next_lexed_token().unwrap();
5136 assert_eq!(token.kind, TokenKind::Word);
5137
5138 let word = token.word().unwrap();
5139 let segments: Vec<_> = word
5140 .segments()
5141 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5142 .collect();
5143
5144 assert_eq!(
5145 segments,
5146 vec![
5147 (LexedWordSegmentKind::Plain, "foo".to_string()),
5148 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
5149 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
5150 ]
5151 );
5152 assert_eq!(word.joined_text(), "foobarbaz");
5153 assert_eq!(
5154 word.segments()
5155 .next()
5156 .and_then(LexedWordSegment::span)
5157 .unwrap()
5158 .slice(source),
5159 "foo"
5160 );
5161 }
5162
5163 #[test]
5164 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5165 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5166
5167 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5168 let body = &source[..consumed];
5169
5170 assert!(body.contains("field, direction"));
5171 assert!(body.ends_with(')'));
5172 }
5173
5174 #[test]
5175 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5176 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5177
5178 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5179 let body = &source[..consumed];
5180
5181 assert!(body.contains("printf '%s' y"));
5182 assert!(body.ends_with(')'));
5183 }
5184
5185 #[test]
5186 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5187 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5188
5189 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5190 let body = &source[..consumed];
5191
5192 assert!(body.contains("printf %s 1,2"));
5193 assert!(body.ends_with(')'));
5194 }
5195
5196 #[test]
5197 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5198 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5199
5200 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5201 let body = &source[..consumed];
5202
5203 assert!(body.contains("field, direction"));
5204 assert!(body.ends_with(')'));
5205 }
5206
5207 #[test]
5208 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5209 let source = "printf %s ${x//foo/)},1)\"";
5210
5211 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5212 let body = &source[..consumed];
5213
5214 assert!(body.contains("${x//foo/)},1"));
5215 assert!(body.ends_with(')'));
5216 }
5217
5218 #[test]
5219 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5220 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5221
5222 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5223 let body = &source[..consumed];
5224
5225 assert!(body.contains("printf %s 1,2"));
5226 assert!(body.ends_with(')'));
5227 }
5228
5229 #[test]
5230 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5231 let source = "[[ \"$buf\" == (#b)(*) ]]";
5232 let index = source.find('#').expect("expected hash");
5233
5234 assert!(!hash_starts_comment(source, index));
5235 }
5236
5237 #[test]
5238 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5239 let source = "(#comment with )";
5240 let index = source.find('#').expect("expected hash");
5241
5242 assert!(hash_starts_comment(source, index));
5243 }
5244
5245 #[test]
5246 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5247 let source = "(( #c < 256 ))";
5248 let index = source.find('#').expect("expected hash");
5249
5250 assert!(!hash_starts_comment(source, index));
5251 }
5252
5253 #[test]
5254 fn test_hash_starts_comment_respects_quoted_double_parens() {
5255 let source = "printf '((' # comment";
5256 let index = source.find('#').expect("expected hash");
5257
5258 assert!(hash_starts_comment(source, index));
5259 }
5260
5261 #[test]
5262 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5263 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5264
5265 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5266 let body = &source[..consumed];
5267
5268 assert!(body.contains("printf %s 1,2"));
5269 assert!(body.ends_with(')'));
5270 }
5271
5272 #[test]
5273 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5274 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5275
5276 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5277 let body = &source[..consumed];
5278
5279 assert!(body.contains("printf %s 1,2"));
5280 assert!(body.ends_with(')'));
5281 }
5282
5283 #[test]
5284 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5285 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5286
5287 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5288 let body = &source[..consumed];
5289
5290 assert!(body.contains("printf %s 1,2"));
5291 assert!(body.ends_with(')'));
5292 }
5293
5294 #[test]
5295 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5296 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5297
5298 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5299 let body = &source[..consumed];
5300
5301 assert!(body.contains("printf %s 1,2"));
5302 assert!(body.ends_with("))"));
5303 }
5304
5305 #[test]
5306 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5307 let source = "printf %s 1,2; echo case in)\"";
5308
5309 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5310 let body = &source[..consumed];
5311
5312 assert!(body.contains("echo case in"));
5313 assert!(body.ends_with(')'));
5314 }
5315
5316 #[test]
5317 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5318 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5319
5320 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5321 let body = &source[..consumed];
5322
5323 assert!(body.contains("$'a\\'b'"));
5324 assert!(body.contains("printf %s 1,2"));
5325 assert!(body.ends_with(')'));
5326 }
5327
5328 #[test]
5329 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5330 let source = "printf %s `echo foo)`; printf %s ok)\"";
5331
5332 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5333 let body = &source[..consumed];
5334
5335 assert!(body.contains("`echo foo)`"));
5336 assert!(body.contains("printf %s ok"));
5337 assert!(body.ends_with(')'));
5338 }
5339
5340 #[test]
5341 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5342 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5343
5344 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5345 let body = &source[..consumed];
5346
5347 assert!(body.contains("${x/`echo }`/foo)},1"));
5348 assert!(body.ends_with(')'));
5349 }
5350
5351 #[test]
5352 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5353 {
5354 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5355
5356 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5357 let body = &source[..consumed];
5358
5359 assert!(body.contains("${x/<(echo })/foo)},1"));
5360 assert!(body.ends_with(')'));
5361 }
5362
5363 #[test]
5364 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5365 let source = "printf %s 1,2; echo case in)";
5366
5367 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5368 let body = &source[..consumed];
5369
5370 assert_eq!(body, source);
5371 }
5372
5373 #[test]
5374 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5375 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5376
5377 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5378 let body = &source[..consumed];
5379
5380 assert_eq!(body, source);
5381 }
5382
5383 #[test]
5384 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5385 let source = "printf %s `echo foo)`; printf %s ok)";
5386
5387 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5388 let body = &source[..consumed];
5389
5390 assert_eq!(body, source);
5391 }
5392
5393 #[test]
5394 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5395 let source = "echo \"$line\" | cut -d' ' -f2-)";
5396
5397 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5398 let body = &source[..consumed];
5399
5400 assert_eq!(body, source);
5401 }
5402
5403 #[test]
5404 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5405 let source = "echo \"${@}\" | tr -d '[:space:]')";
5406
5407 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5408 let body = &source[..consumed];
5409
5410 assert_eq!(body, source);
5411 }
5412
5413 #[test]
5414 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5415 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5416
5417 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5418 let body = &source[..consumed];
5419
5420 assert_eq!(body, source);
5421 }
5422
5423 #[test]
5424 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5425 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5426
5427 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5428 let body = &source[..consumed];
5429
5430 assert_eq!(body, source);
5431 }
5432
5433 #[test]
5434 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5435 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5436 let mut lexer = Lexer::new(source);
5437
5438 let first = lexer.next_lexed_token().expect("expected first token");
5439 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5440 assert_eq!(first.word_string().as_deref(), Some("echo"));
5441
5442 let second = lexer.next_lexed_token().expect("expected second token");
5443 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5444 assert_eq!(
5445 second.word_string().as_deref(),
5446 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5447 );
5448 }
5449
5450 #[test]
5451 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5452 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5453 let start = source.find("$(").expect("expected command substitution") + 2;
5454 let consumed =
5455 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5456 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5457 }
5458
5459 #[test]
5460 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5461 let source = "echo $(echo $(basename $filename .fuzz))";
5462 let start = source.find("$(").expect("expected command substitution") + 2;
5463 let consumed =
5464 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5465 assert_eq!(
5466 &source[start..start + consumed],
5467 "echo $(basename $filename .fuzz))"
5468 );
5469 }
5470
5471 #[test]
5472 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5473 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5474 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5475 assert_eq!(consumed, source.len());
5476 }
5477
5478 #[test]
5479 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5480 let source = "'foo'bar";
5481 let mut lexer = Lexer::new(source);
5482
5483 let token = lexer.next_lexed_token().unwrap();
5484 assert_eq!(token.kind, TokenKind::LiteralWord);
5485
5486 let word = token.word().unwrap();
5487 let segments: Vec<_> = word
5488 .segments()
5489 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5490 .collect();
5491
5492 assert_eq!(
5493 segments,
5494 vec![
5495 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5496 (LexedWordSegmentKind::Plain, "bar".to_string()),
5497 ]
5498 );
5499 assert_eq!(word.joined_text(), "foobar");
5500 assert_eq!(
5501 word.segments()
5502 .nth(1)
5503 .and_then(LexedWordSegment::span)
5504 .unwrap()
5505 .slice(source),
5506 "bar"
5507 );
5508 }
5509
5510 #[test]
5511 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5512 let source = "$(printf hi)";
5513 let mut lexer = Lexer::new(source);
5514
5515 let token = lexer.next_lexed_token().unwrap();
5516 assert_eq!(token.kind, TokenKind::Word);
5517
5518 let word = token.word().unwrap();
5519 let segment = word.single_segment().unwrap();
5520 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5521 assert_eq!(segment.as_str(), source);
5522 assert_eq!(segment.span().unwrap().slice(source), source);
5523 }
5524
5525 #[test]
5526 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5527 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5528 let mut lexer = Lexer::new(source);
5529
5530 let token = lexer.next_lexed_token().unwrap();
5531 assert_eq!(token.kind, TokenKind::Word);
5532
5533 let word = token.word().unwrap();
5534 let segment = word.single_segment().unwrap();
5535 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5536 assert_eq!(segment.as_str(), source);
5537 assert_eq!(segment.span().unwrap().slice(source), source);
5538 }
5539
5540 #[test]
5541 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5542 let source = "\"foo\"$(printf hi)";
5543 let mut lexer = Lexer::new(source);
5544
5545 let token = lexer.next_lexed_token().unwrap();
5546 assert_eq!(token.kind, TokenKind::Word);
5547
5548 let word = token.word().unwrap();
5549 let continuation = word.segments().nth(1).unwrap();
5550 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5551 assert_eq!(continuation.as_str(), "$(printf hi)");
5552 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5553 }
5554
5555 #[test]
5556 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5557 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5558 let mut lexer = Lexer::new(source);
5559
5560 let token = lexer.next_lexed_token().unwrap();
5561 assert_eq!(token.kind, TokenKind::QuotedWord);
5562
5563 let word = token.word().unwrap();
5564 let segment = word.single_segment().unwrap();
5565 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5566 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5567 assert_eq!(
5568 segment.span().unwrap().slice(source),
5569 "${arr[$RANDOM % ${#arr[@]}]}"
5570 );
5571 }
5572
5573 #[test]
5574 fn test_ansi_c_control_escape_can_consume_quote() {
5575 let mut lexer = Lexer::new("echo $'\\c''");
5576
5577 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5578 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5579 assert!(lexer.next_lexed_token().is_none());
5580 }
5581
5582 #[test]
5583 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5584 let source = r#"out_line="${out_line//'"'/'\"'}"
5585"#;
5586 let mut lexer = Lexer::new(source);
5587
5588 assert_next_token(
5589 &mut lexer,
5590 TokenKind::Word,
5591 Some(r#"out_line=${out_line//'"'/'"'}"#),
5592 );
5593 assert_next_token(&mut lexer, TokenKind::Newline, None);
5594 assert!(lexer.next_lexed_token().is_none());
5595 }
5596
5597 #[test]
5598 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5599 let source = r#"out_line="${out_line//'"'/'\"'}"
5600echo "Error: Missing python3!"
5601cat << 'EOF' > "${pywrapper}"
5602import os
5603EOF
5604"#;
5605 let mut lexer = Lexer::new(source);
5606
5607 assert_next_token(
5608 &mut lexer,
5609 TokenKind::Word,
5610 Some(r#"out_line=${out_line//'"'/'"'}"#),
5611 );
5612 assert_next_token(&mut lexer, TokenKind::Newline, None);
5613 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5614 assert_next_token(
5615 &mut lexer,
5616 TokenKind::QuotedWord,
5617 Some("Error: Missing python3!"),
5618 );
5619 assert_next_token(&mut lexer, TokenKind::Newline, None);
5620 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5621 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5622 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5623 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5624 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5625 }
5626
5627 #[test]
5628 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5629 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5630 let mut lexer = Lexer::new(source);
5631
5632 let token = lexer.next_lexed_token().unwrap();
5633 assert_eq!(token.kind, TokenKind::Word);
5634 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5635 assert!(token.source_slice(source).is_none());
5636 assert_eq!(
5637 token.word_string().as_deref(),
5638 Some("crypt=${crypt//\\/\\\\}")
5639 );
5640 assert_next_token(&mut lexer, TokenKind::Newline, None);
5641 assert!(lexer.next_lexed_token().is_none());
5642 }
5643
5644 #[test]
5645 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5646 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5647 let mut lexer = Lexer::new(source);
5648
5649 assert_next_token(
5650 &mut lexer,
5651 TokenKind::Word,
5652 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5653 );
5654 assert_next_token(&mut lexer, TokenKind::Newline, None);
5655 assert_next_token(&mut lexer, TokenKind::Newline, None);
5656 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5657 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5658 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5659 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5660 assert_next_token(&mut lexer, TokenKind::Newline, None);
5661 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5662 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5663 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5664 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5665 assert_next_token(&mut lexer, TokenKind::Newline, None);
5666 assert_next_token(
5667 &mut lexer,
5668 TokenKind::Word,
5669 Some(
5670 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5671 ),
5672 );
5673 assert_next_token(&mut lexer, TokenKind::Newline, None);
5674 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5675 assert_next_token(&mut lexer, TokenKind::Newline, None);
5676 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5677 assert_next_token(&mut lexer, TokenKind::Newline, None);
5678 assert!(lexer.next_lexed_token().is_none());
5679 }
5680
5681 #[test]
5682 fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5683 let source = "case \"$word\" in\n {) : ;;\n :) : ;;\nesac\n";
5684 let mut lexer = Lexer::new(source);
5685
5686 assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5687 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5688 assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5689 assert_next_token(&mut lexer, TokenKind::Newline, None);
5690 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5691 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5692 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5693 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5694 assert_next_token(&mut lexer, TokenKind::Newline, None);
5695 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5696 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5697 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5698 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5699 assert_next_token(&mut lexer, TokenKind::Newline, None);
5700 assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5701 assert_next_token(&mut lexer, TokenKind::Newline, None);
5702 assert!(lexer.next_lexed_token().is_none());
5703 }
5704
5705 #[test]
5706 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5707 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5708 let mut lexer = Lexer::new(source);
5709
5710 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5711 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5712 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5713 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5714 assert_next_token(&mut lexer, TokenKind::And, None);
5715 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5716 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5717 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5718 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5719 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5720 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5721 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5722 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5723 assert_next_token(&mut lexer, TokenKind::Newline, None);
5724 assert!(lexer.next_lexed_token().is_none());
5725 }
5726
5727 #[test]
5728 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5729 let source = "echo -{$(echo a),b}-\n";
5730 let mut lexer = Lexer::new(source);
5731
5732 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5733 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5734 assert_next_token(&mut lexer, TokenKind::Newline, None);
5735 assert!(lexer.next_lexed_token().is_none());
5736 }
5737
5738 #[test]
5739 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5740 let source = "echo -{$((1 + 2)),b}-\n";
5741 let mut lexer = Lexer::new(source);
5742
5743 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5744 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5745 assert_next_token(&mut lexer, TokenKind::Newline, None);
5746 assert!(lexer.next_lexed_token().is_none());
5747 }
5748
5749 #[test]
5750 fn test_operators() {
5751 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5752
5753 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5754 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5755 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5756 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5757 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5758 assert_next_token(&mut lexer, TokenKind::And, None);
5759 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5760 assert_next_token(&mut lexer, TokenKind::Or, None);
5761 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5762 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5763 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5764 assert_next_token(&mut lexer, TokenKind::Background, None);
5765 assert!(lexer.next_lexed_token().is_none());
5766 }
5767
5768 #[test]
5769 fn test_double_left_bracket_requires_separator() {
5770 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5771
5772 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5773 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5774 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5775 assert_next_token(&mut lexer, TokenKind::Newline, None);
5776 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5777 assert_next_token(&mut lexer, TokenKind::Newline, None);
5778 assert!(lexer.next_lexed_token().is_none());
5779 }
5780
5781 #[test]
5782 fn test_redirects() {
5783 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5784
5785 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5786 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5787 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5788 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5789 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5790 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5791 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5792 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5793 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5794 let token = lexer.next_lexed_token().unwrap();
5795 assert_eq!(token.kind, TokenKind::Clobber);
5796 assert_eq!(token.fd_value(), Some(2));
5797 assert_eq!(token_text(&token, lexer.input), None);
5798 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5799 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5800 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5801 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5802 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5803 assert_next_token(&mut lexer, TokenKind::HereString, None);
5804 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5805 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5806 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5807 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5808 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5809 }
5810
5811 #[test]
5812 fn test_comment() {
5813 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5814
5815 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5816 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5817 assert_next_token(&mut lexer, TokenKind::Newline, None);
5818 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5819 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5820 }
5821
5822 #[test]
5823 fn test_comment_token_with_span() {
5824 let mut lexer = Lexer::new("# lead\necho hi # tail");
5825
5826 let comment = lexer.next_lexed_token_with_comments().unwrap();
5827 assert_eq!(comment.kind, TokenKind::Comment);
5828 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5829 assert_eq!(comment.span.start.line, 1);
5830 assert_eq!(comment.span.start.column, 1);
5831 assert_eq!(comment.span.end.line, 1);
5832 assert_eq!(comment.span.end.column, 7);
5833
5834 assert_next_token(&mut lexer, TokenKind::Newline, None);
5835 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5836 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5837
5838 let inline = lexer.next_lexed_token_with_comments().unwrap();
5839 assert_eq!(inline.kind, TokenKind::Comment);
5840 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5841 assert_eq!(inline.span.start.line, 2);
5842 assert_eq!(inline.span.start.column, 9);
5843 }
5844
5845 #[test]
5846 fn test_comment_token_preserves_hash_boundaries() {
5847 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5848
5849 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5850 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5851 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5852 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5853 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5854 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5855 assert!(lexer.next_lexed_token_with_comments().is_none());
5856 }
5857
5858 #[test]
5859 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5860 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5861
5862 let mut saw_comment = false;
5863 while let Some(token) = lexer.next_lexed_token_with_comments() {
5864 if token.kind == TokenKind::Comment {
5865 saw_comment = true;
5866 break;
5867 }
5868 }
5869
5870 assert!(
5871 !saw_comment,
5872 "zsh inline glob controls inside [[ ]] should not lex as comments"
5873 );
5874 }
5875
5876 #[test]
5877 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5878 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5879
5880 let mut saw_comment = false;
5881 while let Some(token) = lexer.next_lexed_token_with_comments() {
5882 if token.kind == TokenKind::Comment {
5883 saw_comment = true;
5884 break;
5885 }
5886 }
5887
5888 assert!(
5889 !saw_comment,
5890 "zsh arithmetic char literals inside (( )) should not lex as comments"
5891 );
5892 }
5893
5894 #[test]
5895 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5896 let mut lexer = Lexer::new(
5897 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5898 );
5899
5900 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5901 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5902 assert_next_token(
5903 &mut lexer,
5904 TokenKind::LiteralWord,
5905 Some("\\e]133;C;cmdline_url=%s\\a"),
5906 );
5907 assert_next_token(
5908 &mut lexer,
5909 TokenKind::QuotedWord,
5910 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5911 );
5912 assert_next_token(&mut lexer, TokenKind::Newline, None);
5913 }
5914
5915 #[test]
5916 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5917 let mut lexer = Lexer::new(
5918 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5919 );
5920
5921 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5922 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5923 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5924 assert_next_token(&mut lexer, TokenKind::Newline, None);
5925 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5926 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5927 assert_next_token(
5928 &mut lexer,
5929 TokenKind::LiteralWord,
5930 Some("\\e]133;C;cmdline_url=%s\\a"),
5931 );
5932 assert_next_token(
5933 &mut lexer,
5934 TokenKind::QuotedWord,
5935 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5936 );
5937 assert_next_token(&mut lexer, TokenKind::Newline, None);
5938 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5939 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5940 assert_next_token(&mut lexer, TokenKind::Newline, None);
5941 }
5942
5943 #[test]
5944 fn test_variable_words() {
5945 let mut lexer = Lexer::new("echo $HOME $USER");
5946
5947 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5948 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5949 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5950 assert!(lexer.next_lexed_token().is_none());
5951 }
5952
5953 #[test]
5954 fn test_pipeline_tokens() {
5955 let mut lexer = Lexer::new("echo hello | cat");
5956
5957 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5958 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5959 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5960 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5961 assert!(lexer.next_lexed_token().is_none());
5962 }
5963
5964 #[test]
5965 fn test_read_heredoc() {
5966 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5968 let content = lexer.read_heredoc("EOF", false);
5969 assert_eq!(content.content, "hello\nworld\n");
5970 }
5971
5972 #[test]
5973 fn test_read_heredoc_single_line() {
5974 let mut lexer = Lexer::new("\ntest\nEOF");
5975 let content = lexer.read_heredoc("EOF", false);
5976 assert_eq!(content.content, "test\n");
5977 }
5978
5979 #[test]
5980 fn test_read_heredoc_full_scenario() {
5981 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5983
5984 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5986 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5987 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5988
5989 let content = lexer.read_heredoc("EOF", false);
5991 assert_eq!(content.content, "hello\nworld\n");
5992 }
5993
5994 #[test]
5995 fn test_read_heredoc_with_redirect() {
5996 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5998 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5999 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6000 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6001 let content = lexer.read_heredoc("EOF", false);
6002 assert_eq!(content.content, "hello\n");
6003 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6005 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
6006 }
6007
6008 #[test]
6009 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
6010 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
6011 let mut lexer = Lexer::new(source);
6012
6013 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6014 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6015 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6016
6017 let heredoc = lexer.read_heredoc("EOF", false);
6018 assert_eq!(heredoc.content, "hello\n");
6019
6020 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6021 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
6022 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6023 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6024 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
6025 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6026 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
6027 }
6028
6029 #[test]
6030 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
6031 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
6032 let mut lexer = Lexer::new(source);
6033
6034 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6035 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6036 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6037
6038 let heredoc = lexer.read_heredoc("EOF", false);
6039 assert_eq!(heredoc.content, "1\n2\n3\n");
6040 }
6041
6042 #[test]
6043 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
6044 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
6045 let mut lexer = Lexer::new(source);
6046
6047 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6048 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6049 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6050
6051 let heredoc = lexer.read_heredoc("EOF", false);
6052 assert_eq!(heredoc.content, "body\n");
6053 }
6054
6055 #[test]
6056 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
6057 let source = "cat <<EOF # note \\\nbody\nEOF\n";
6058 let mut lexer = Lexer::new(source);
6059
6060 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6061 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6062 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6063
6064 let heredoc = lexer.read_heredoc("EOF", false);
6065 assert_eq!(heredoc.content, "body\n");
6066 }
6067
6068 #[test]
6069 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
6070 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
6071 let mut lexer = Lexer::new(source);
6072
6073 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6074 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6075 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6076 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6077
6078 let heredoc = lexer.read_heredoc("EOF", false);
6079 assert_eq!(heredoc.content, "body\n");
6080
6081 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6082 }
6083
6084 #[test]
6085 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
6086 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
6087 let mut lexer = Lexer::new(source);
6088
6089 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6090 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6091 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6092
6093 let heredoc = lexer.read_heredoc("EOF", false);
6094 assert_eq!(heredoc.content, "1\n");
6095
6096 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6097 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
6098 }
6099
6100 #[test]
6101 fn test_read_heredoc_with_redirect_preserves_following_spans() {
6102 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
6103 let mut lexer = Lexer::new(source);
6104
6105 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6106 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6107 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6108
6109 let heredoc = lexer.read_heredoc("EOF", false);
6110 assert_eq!(heredoc.content, "hello\n");
6111
6112 let redirect = lexer.next_lexed_token_with_comments().unwrap();
6113 assert_eq!(redirect.kind, TokenKind::RedirectOut);
6114 assert_eq!(redirect.span.slice(source), ">");
6115
6116 let target = lexer.next_lexed_token_with_comments().unwrap();
6117 assert_eq!(target.kind, TokenKind::Word);
6118 assert_eq!(
6119 token_text(&target, lexer.input).as_deref(),
6120 Some("file.txt")
6121 );
6122 assert_eq!(target.span.slice(source), "file.txt");
6123
6124 let newline = lexer.next_lexed_token_with_comments().unwrap();
6125 assert_eq!(newline.kind, TokenKind::Newline);
6126 assert_eq!(newline.span.slice(source), "\n");
6127
6128 let comment = lexer.next_lexed_token_with_comments().unwrap();
6129 assert_eq!(comment.kind, TokenKind::Comment);
6130 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
6131 assert_eq!(comment.span.slice(source), "# done");
6132 }
6133
6134 #[test]
6135 fn test_comment_with_unicode() {
6136 let source = "# café résumé\necho ok";
6138 let mut lexer = Lexer::new(source);
6139
6140 let comment = lexer.next_lexed_token_with_comments().unwrap();
6141 assert_eq!(comment.kind, TokenKind::Comment);
6142 assert_eq!(
6143 token_text(&comment, lexer.input).as_deref(),
6144 Some(" café résumé")
6145 );
6146 let start = comment.span.start.offset;
6148 let end = comment.span.end.offset;
6149 assert_eq!(start, 0);
6150 assert_eq!(&source[start..end], "# café résumé");
6151 assert!(source.is_char_boundary(start));
6152 assert!(source.is_char_boundary(end));
6153
6154 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6155 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6156 }
6157
6158 #[test]
6159 fn test_comment_with_cjk_characters() {
6160 let source = "# 你好世界\necho ok";
6162 let mut lexer = Lexer::new(source);
6163
6164 let comment = lexer.next_lexed_token_with_comments().unwrap();
6165 assert_eq!(comment.kind, TokenKind::Comment);
6166 assert_eq!(
6167 token_text(&comment, lexer.input).as_deref(),
6168 Some(" 你好世界")
6169 );
6170 let start = comment.span.start.offset;
6171 let end = comment.span.end.offset;
6172 assert_eq!(&source[start..end], "# 你好世界");
6173 assert!(source.is_char_boundary(start));
6174 assert!(source.is_char_boundary(end));
6175 }
6176
6177 #[test]
6178 fn test_heredoc_with_comments_inside() {
6179 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6181 let mut lexer = Lexer::new(source);
6182
6183 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6184 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6185 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6186
6187 let heredoc = lexer.read_heredoc("EOF", false);
6188 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6189
6190 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6193 let comment = lexer.next_lexed_token_with_comments().unwrap();
6194 assert_eq!(comment.kind, TokenKind::Comment);
6195 assert_eq!(
6196 token_text(&comment, lexer.input).as_deref(),
6197 Some(" real comment")
6198 );
6199 }
6200
6201 #[test]
6202 fn test_heredoc_with_hash_in_variable() {
6203 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6205 let mut lexer = Lexer::new(source);
6206
6207 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6208 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6209 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6210
6211 let heredoc = lexer.read_heredoc("EOF", false);
6212 assert_eq!(heredoc.content, "val=${x#prefix}\n");
6213 }
6214
6215 #[test]
6216 fn test_heredoc_span_does_not_leak() {
6217 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6220 let mut lexer = Lexer::new(source);
6221
6222 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6223 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6224 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6225
6226 let heredoc = lexer.read_heredoc("EOF", false);
6227 let start = heredoc.content_span.start.offset;
6228 let end = heredoc.content_span.end.offset;
6229 assert!(
6230 end <= source.len(),
6231 "heredoc span end ({end}) exceeds source length ({})",
6232 source.len()
6233 );
6234 assert_eq!(&source[start..end], "hello\nworld\n");
6235
6236 assert_next_token(&mut lexer, TokenKind::Newline, None);
6238 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6239 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6240 }
6241
6242 #[test]
6243 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6244 let source = "\
6245cat <<\\_ACEOF
6246Use these variables to override the choices made by `configure' or to help
6247it to find libraries and programs with nonstandard names/locations.
6248_ACEOF
6249ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6250ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6251";
6252 let mut lexer = Lexer::new(source);
6253
6254 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6255 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6256 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6257 assert_eq!(delimiter.kind, TokenKind::Word);
6258 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6259
6260 let heredoc = lexer.read_heredoc("_ACEOF", false);
6261 assert_eq!(
6262 heredoc.content,
6263 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6264 );
6265
6266 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6267
6268 let first = lexer.next_lexed_token_with_comments().unwrap();
6269 assert_eq!(first.kind, TokenKind::Word);
6270 assert_eq!(
6271 first.span.slice(source),
6272 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6273 );
6274 let first_segments = first
6275 .word()
6276 .unwrap()
6277 .segments()
6278 .map(|segment| {
6279 (
6280 segment.kind(),
6281 segment.as_str().to_string(),
6282 segment.span().map(|span| span.slice(source).to_string()),
6283 )
6284 })
6285 .collect::<Vec<_>>();
6286 assert_eq!(
6287 first_segments,
6288 vec![
6289 (
6290 LexedWordSegmentKind::Plain,
6291 "ac_dir_suffix=/".to_string(),
6292 Some("ac_dir_suffix=/".to_string()),
6293 ),
6294 (
6295 LexedWordSegmentKind::Plain,
6296 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6297 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6298 ),
6299 ]
6300 );
6301
6302 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6303
6304 let second = lexer.next_lexed_token_with_comments().unwrap();
6305 assert_eq!(second.kind, TokenKind::Word);
6306 assert_eq!(
6307 second.span.slice(source),
6308 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6309 );
6310 let second_segments = second
6311 .word()
6312 .unwrap()
6313 .segments()
6314 .map(|segment| {
6315 (
6316 segment.kind(),
6317 segment.as_str().to_string(),
6318 segment.span().map(|span| span.slice(source).to_string()),
6319 )
6320 })
6321 .collect::<Vec<_>>();
6322 assert_eq!(
6323 second_segments,
6324 vec![
6325 (
6326 LexedWordSegmentKind::Plain,
6327 "ac_top_builddir_sub=".to_string(),
6328 Some("ac_top_builddir_sub=".to_string()),
6329 ),
6330 (
6331 LexedWordSegmentKind::Plain,
6332 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6333 Some(
6334 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6335 .to_string(),
6336 ),
6337 ),
6338 ]
6339 );
6340 }
6341
6342 #[test]
6343 fn test_heredoc_with_unicode_content() {
6344 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6346 let mut lexer = Lexer::new(source);
6347
6348 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6349 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6350 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6351
6352 let heredoc = lexer.read_heredoc("EOF", false);
6353 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6354 let start = heredoc.content_span.start.offset;
6355 let end = heredoc.content_span.end.offset;
6356 assert!(
6357 source.is_char_boundary(start),
6358 "heredoc span start ({start}) not on char boundary"
6359 );
6360 assert!(
6361 source.is_char_boundary(end),
6362 "heredoc span end ({end}) not on char boundary"
6363 );
6364 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6365 }
6366
6367 #[test]
6368 fn test_assoc_compound_assignment() {
6369 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6372 assert_next_token(
6373 &mut lexer,
6374 TokenKind::Word,
6375 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6376 );
6377 assert!(lexer.next_lexed_token().is_none());
6378 }
6379
6380 #[test]
6381 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6382 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6383 let mut lexer = Lexer::new(source);
6384
6385 let token = lexer.next_lexed_token().unwrap();
6386 assert_eq!(token.kind, TokenKind::Word);
6387 assert_eq!(token.span.slice(source), source);
6388 assert!(lexer.next_lexed_token().is_none());
6389 }
6390
6391 #[test]
6392 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6393 let source = r#"foo\_bar@(baz|qux)"#;
6394 let mut lexer = Lexer::new(source);
6395
6396 let token = lexer.next_lexed_token().unwrap();
6397 assert_eq!(token.kind, TokenKind::Word);
6398 assert_eq!(token.span.slice(source), source);
6399 assert!(lexer.next_lexed_token().is_none());
6400 }
6401
6402 #[test]
6403 fn test_indexed_array_not_collapsed() {
6404 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6407 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6408 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6409 }
6410
6411 #[test]
6412 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6413 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6414 let mut lexer = Lexer::new(source);
6415
6416 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6417 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6418
6419 let token = lexer.next_lexed_token().unwrap();
6420 assert_eq!(token.kind, TokenKind::Word);
6421 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6422
6423 let word = token.word().unwrap();
6424 let segments: Vec<_> = word
6425 .segments()
6426 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6427 .collect();
6428 assert_eq!(
6429 segments,
6430 vec![
6431 (
6432 LexedWordSegmentKind::DoubleQuoted,
6433 "$plugin_dir".to_string()
6434 ),
6435 (LexedWordSegmentKind::Plain, "/*".to_string()),
6436 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6437 ]
6438 );
6439
6440 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6441 assert!(lexer.next_lexed_token().is_none());
6442 }
6443
6444 #[test]
6445 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6446 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6447 let mut lexer = Lexer::new(source);
6448
6449 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6450 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6451
6452 let token = lexer.next_lexed_token().unwrap();
6453 assert_eq!(token.kind, TokenKind::Word);
6454 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6455
6456 let word = token.word().unwrap();
6457 let segments: Vec<_> = word
6458 .segments()
6459 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6460 .collect();
6461 assert_eq!(
6462 segments,
6463 vec![
6464 (
6465 LexedWordSegmentKind::DoubleQuoted,
6466 "$__GREP_CACHE_FILE".to_string()
6467 ),
6468 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6469 ]
6470 );
6471
6472 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6473 assert!(lexer.next_lexed_token().is_none());
6474 }
6475
6476 #[test]
6477 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6478 let source = r#"$dir/${~pats}(N)"#;
6479 let mut lexer = Lexer::new(source);
6480
6481 let token = lexer.next_lexed_token().unwrap();
6482 assert_eq!(token.kind, TokenKind::Word);
6483 assert_eq!(token.span.slice(source), source);
6484 assert!(lexer.next_lexed_token().is_none());
6485 }
6486
6487 #[test]
6488 fn test_dollar_word_does_not_absorb_function_parens() {
6489 let mut lexer = Lexer::new(r#"foo$x()"#);
6490
6491 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6492 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6493 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6494 assert!(lexer.next_lexed_token().is_none());
6495 }
6496
6497 #[test]
6498 fn test_command_substitution_word_does_not_absorb_function_parens() {
6499 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6500
6501 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6502 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6503 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6504 assert!(lexer.next_lexed_token().is_none());
6505 }
6506
6507 #[test]
6510 fn test_digit_at_eof_no_panic() {
6511 let mut lexer = Lexer::new("2");
6513 let token = lexer.next_lexed_token();
6514 assert!(token.is_some());
6515 }
6516
6517 #[test]
6519 fn test_nested_brace_expansion_single_token() {
6520 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6522 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6523 assert!(lexer.next_lexed_token().is_none());
6525 }
6526
6527 #[test]
6529 fn test_simple_brace_expansion_unchanged() {
6530 let mut lexer = Lexer::new("${foo}");
6531 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6532 assert!(lexer.next_lexed_token().is_none());
6533 }
6534
6535 #[test]
6536 fn test_nvm_fixture_lexes_without_stalling() {
6537 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6538 let mut lexer = Lexer::new(input);
6539 let mut tokens = 0usize;
6540
6541 while lexer.next_lexed_token().is_some() {
6542 tokens += 1;
6543 assert!(
6544 tokens < 100_000,
6545 "lexer should continue making progress on the nvm fixture"
6546 );
6547 }
6548
6549 assert!(tokens > 0, "nvm fixture should produce at least one token");
6550 }
6551
6552 #[test]
6553 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6554 let input = concat!(
6555 "case \"${_input_type:-}\" in\n",
6556 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6557 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6558 "esac\n",
6559 );
6560
6561 assert_non_newline_tokens_stay_on_one_line(input);
6562
6563 let mut lexer = Lexer::new(input);
6564 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6565 .map(|token| (token.kind, token_text(&token, input)))
6566 .collect::<Vec<_>>();
6567 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6568 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6569 }
6570
6571 #[test]
6572 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6573 let input = concat!(
6574 "case $2 in\n",
6575 " cygwin*) bin='cygwin32/bin' ;|\n",
6576 "esac\n",
6577 );
6578
6579 let mut lexer = Lexer::new(input);
6580 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6581 .map(|token| (token.kind, token_text(&token, input)))
6582 .collect::<Vec<_>>();
6583
6584 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6585 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6586 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6587 }
6588
6589 #[test]
6590 fn test_inline_if_with_array_append_stays_line_local() {
6591 let input = concat!(
6592 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6593 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6594 );
6595
6596 assert_non_newline_tokens_stay_on_one_line(input);
6597 }
6598
6599 #[test]
6600 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6601 let source = "unsetopt interactive_comments\n#literal\n";
6602 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6603 let mut lexer = Lexer::with_profile(source, &profile);
6604
6605 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6606 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6607 assert_next_token(&mut lexer, TokenKind::Newline, None);
6608 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6609 }
6610
6611 #[test]
6612 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6613 let source = "setopt rc_quotes\nprint 'a''b'\n";
6614 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6615 let mut lexer = Lexer::with_profile(source, &profile);
6616
6617 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6618 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6619 assert_next_token(&mut lexer, TokenKind::Newline, None);
6620 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6621 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6622 }
6623
6624 #[test]
6625 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6626 let source = "setopt ignore_braces\n{ echo }\n";
6627 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6628 let mut lexer = Lexer::with_profile(source, &profile);
6629
6630 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6631 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6632 assert_next_token(&mut lexer, TokenKind::Newline, None);
6633 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6634 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6635 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6636 }
6637
6638 #[test]
6639 fn test_heredoc_in_arithmetic_fuzz_crash() {
6640 let data: &[u8] = &[
6644 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6645 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6646 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6647 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6648 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6649 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6650 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6651 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6652 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6653 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6654 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6655 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6656 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6657 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6658 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6659 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6660 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6661 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6662 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6663 ];
6664 let input = std::str::from_utf8(data).unwrap();
6665 let script = format!("echo $(({input}))\n");
6666 let _ = crate::parser::Parser::new(&script).parse();
6668 }
6669}