1use std::{collections::VecDeque, ops::Range, sync::Arc};
6
7use memchr::{memchr, memchr_iter, memrchr};
8use shuck_ast::{Position, Span, TokenKind};
9use smallvec::SmallVec;
10
11use super::{ShellProfile, ZshOptionState, ZshOptionTimeline};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
14pub(crate) struct TokenFlags(u8);
15
16impl TokenFlags {
17 const COOKED_TEXT: u8 = 1 << 0;
18 const SYNTHETIC: u8 = 1 << 1;
19
20 const fn empty() -> Self {
21 Self(0)
22 }
23
24 const fn cooked_text() -> Self {
25 Self(Self::COOKED_TEXT)
26 }
27
28 pub(crate) const fn with_synthetic(self) -> Self {
29 Self(self.0 | Self::SYNTHETIC)
30 }
31
32 pub(crate) const fn has_cooked_text(self) -> bool {
33 self.0 & Self::COOKED_TEXT != 0
34 }
35
36 pub(crate) const fn is_synthetic(self) -> bool {
37 self.0 & Self::SYNTHETIC != 0
38 }
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
42pub(crate) enum TokenText<'a> {
43 Borrowed(&'a str),
44 Shared {
45 source: Arc<str>,
46 range: Range<usize>,
47 },
48 Owned(String),
49}
50
51impl TokenText<'_> {
52 pub(crate) fn as_str(&self) -> &str {
53 match self {
54 Self::Borrowed(text) => text,
55 Self::Shared { source, range } => &source[range.clone()],
56 Self::Owned(text) => text,
57 }
58 }
59
60 fn into_owned<'a>(self) -> TokenText<'a> {
61 match self {
62 Self::Borrowed(text) => TokenText::Owned(text.to_string()),
63 Self::Shared { source, range } => TokenText::Shared { source, range },
64 Self::Owned(text) => TokenText::Owned(text),
65 }
66 }
67
68 fn into_shared<'a>(self, source: &Arc<str>, span: Option<Span>) -> TokenText<'a> {
69 match self {
70 Self::Borrowed(text) => span
71 .filter(|span| span.end.offset <= source.len())
72 .map_or_else(
73 || TokenText::Owned(text.to_string()),
74 |span| TokenText::Shared {
75 source: Arc::clone(source),
76 range: span.start.offset..span.end.offset,
77 },
78 ),
79 Self::Shared { source, range } => TokenText::Shared { source, range },
80 Self::Owned(text) => TokenText::Owned(text),
81 }
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub(crate) enum LexedWordSegmentKind {
88 Plain,
90 SingleQuoted,
92 DollarSingleQuoted,
94 DoubleQuoted,
96 DollarDoubleQuoted,
98 Composite,
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
104pub(crate) struct LexedWordSegment<'a> {
105 kind: LexedWordSegmentKind,
106 text: TokenText<'a>,
107 span: Option<Span>,
108 wrapper_span: Option<Span>,
109}
110
111impl<'a> LexedWordSegment<'a> {
112 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
113 Self {
114 kind,
115 text: TokenText::Borrowed(text),
116 span,
117 wrapper_span: span,
118 }
119 }
120
121 fn borrowed_with_spans(
122 kind: LexedWordSegmentKind,
123 text: &'a str,
124 span: Option<Span>,
125 wrapper_span: Option<Span>,
126 ) -> Self {
127 Self {
128 kind,
129 text: TokenText::Borrowed(text),
130 span,
131 wrapper_span,
132 }
133 }
134
135 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
136 Self {
137 kind,
138 text: TokenText::Owned(text),
139 span: None,
140 wrapper_span: None,
141 }
142 }
143
144 fn owned_with_spans(
145 kind: LexedWordSegmentKind,
146 text: String,
147 span: Option<Span>,
148 wrapper_span: Option<Span>,
149 ) -> Self {
150 Self {
151 kind,
152 text: TokenText::Owned(text),
153 span,
154 wrapper_span,
155 }
156 }
157
158 pub(crate) fn as_str(&self) -> &str {
160 self.text.as_str()
161 }
162
163 pub(crate) const fn text_is_source_backed(&self) -> bool {
164 matches!(self.text, TokenText::Borrowed(_) | TokenText::Shared { .. })
165 }
166
167 pub(crate) const fn kind(&self) -> LexedWordSegmentKind {
169 self.kind
170 }
171
172 pub(crate) const fn span(&self) -> Option<Span> {
174 self.span
175 }
176
177 pub(crate) fn wrapper_span(&self) -> Option<Span> {
179 self.wrapper_span.or(self.span)
180 }
181
182 fn rebased(mut self, base: Position) -> Self {
183 self.span = self.span.map(|span| span.rebased(base));
184 self.wrapper_span = self.wrapper_span.map(|span| span.rebased(base));
185 self
186 }
187
188 fn into_owned<'b>(self) -> LexedWordSegment<'b> {
189 LexedWordSegment {
190 kind: self.kind,
191 text: self.text.into_owned(),
192 span: self.span,
193 wrapper_span: self.wrapper_span,
194 }
195 }
196
197 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWordSegment<'b> {
198 LexedWordSegment {
199 kind: self.kind,
200 text: self.text.into_shared(source, self.span),
201 span: self.span,
202 wrapper_span: self.wrapper_span,
203 }
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub(crate) struct LexedWord<'a> {
210 primary_segment: LexedWordSegment<'a>,
211 trailing_segments: Vec<LexedWordSegment<'a>>,
212}
213
214impl<'a> LexedWord<'a> {
215 fn from_segment(primary_segment: LexedWordSegment<'a>) -> Self {
216 Self {
217 primary_segment,
218 trailing_segments: Vec::new(),
219 }
220 }
221
222 fn borrowed(kind: LexedWordSegmentKind, text: &'a str, span: Option<Span>) -> Self {
223 Self::from_segment(LexedWordSegment::borrowed(kind, text, span))
224 }
225
226 fn owned(kind: LexedWordSegmentKind, text: String) -> Self {
227 Self::from_segment(LexedWordSegment::owned(kind, text))
228 }
229
230 fn push_segment(&mut self, segment: LexedWordSegment<'a>) {
231 self.trailing_segments.push(segment);
232 }
233
234 pub(crate) fn segments(&self) -> impl Iterator<Item = &LexedWordSegment<'a>> {
236 std::iter::once(&self.primary_segment).chain(self.trailing_segments.iter())
237 }
238
239 pub(crate) fn text(&self) -> Option<&str> {
241 self.single_segment().map(LexedWordSegment::as_str)
242 }
243
244 pub(crate) fn joined_text(&self) -> String {
246 let mut text = String::new();
247 for segment in self.segments() {
248 text.push_str(segment.as_str());
249 }
250 text
251 }
252
253 pub(crate) fn single_segment(&self) -> Option<&LexedWordSegment<'a>> {
255 self.trailing_segments
256 .is_empty()
257 .then_some(&self.primary_segment)
258 }
259
260 fn has_cooked_text(&self) -> bool {
261 self.segments()
262 .any(|segment| matches!(segment.text, TokenText::Owned(_)))
263 }
264
265 fn rebased(mut self, base: Position) -> Self {
266 self.primary_segment = self.primary_segment.rebased(base);
267 self.trailing_segments = self
268 .trailing_segments
269 .into_iter()
270 .map(|segment| segment.rebased(base))
271 .collect();
272 self
273 }
274
275 fn into_owned<'b>(self) -> LexedWord<'b> {
276 LexedWord {
277 primary_segment: self.primary_segment.into_owned(),
278 trailing_segments: self
279 .trailing_segments
280 .into_iter()
281 .map(LexedWordSegment::into_owned)
282 .collect(),
283 }
284 }
285
286 fn into_shared<'b>(self, source: &Arc<str>) -> LexedWord<'b> {
287 LexedWord {
288 primary_segment: self.primary_segment.into_shared(source),
289 trailing_segments: self
290 .trailing_segments
291 .into_iter()
292 .map(|segment| segment.into_shared(source))
293 .collect(),
294 }
295 }
296}
297
298#[derive(Debug, Clone, Copy, PartialEq, Eq)]
300pub(crate) enum LexerErrorKind {
301 CommandSubstitution,
303 BacktickSubstitution,
305 SingleQuote,
307 DoubleQuote,
309}
310
311impl LexerErrorKind {
312 pub(crate) const fn message(self) -> &'static str {
314 match self {
315 Self::CommandSubstitution => "unterminated command substitution",
316 Self::BacktickSubstitution => "unterminated backtick substitution",
317 Self::SingleQuote => "unterminated single quote",
318 Self::DoubleQuote => "unterminated double quote",
319 }
320 }
321}
322
323#[derive(Debug, Clone, PartialEq, Eq)]
324pub(crate) enum TokenPayload<'a> {
325 None,
326 Word(LexedWord<'a>),
327 Fd(i32),
328 FdPair(i32, i32),
329 Error(LexerErrorKind),
330}
331
332#[derive(Debug, Clone, PartialEq, Eq)]
334pub struct LexedToken<'a> {
335 pub kind: TokenKind,
337 pub span: Span,
339 pub(crate) flags: TokenFlags,
340 payload: TokenPayload<'a>,
341}
342
343impl<'a> LexedToken<'a> {
344 fn word_segment_kind(kind: TokenKind) -> LexedWordSegmentKind {
345 match kind {
346 TokenKind::Word => LexedWordSegmentKind::Plain,
347 TokenKind::LiteralWord => LexedWordSegmentKind::SingleQuoted,
348 TokenKind::QuotedWord => LexedWordSegmentKind::DoubleQuoted,
349 _ => LexedWordSegmentKind::Composite,
350 }
351 }
352
353 pub(crate) fn punctuation(kind: TokenKind) -> Self {
354 Self {
355 kind,
356 span: Span::new(),
357 flags: TokenFlags::empty(),
358 payload: TokenPayload::None,
359 }
360 }
361
362 fn with_word_payload(kind: TokenKind, word: LexedWord<'a>) -> Self {
363 let flags = if word.has_cooked_text() {
364 TokenFlags::cooked_text()
365 } else {
366 TokenFlags::empty()
367 };
368
369 Self {
370 kind,
371 span: Span::new(),
372 flags,
373 payload: TokenPayload::Word(word),
374 }
375 }
376
377 fn borrowed_word(kind: TokenKind, text: &'a str, text_span: Option<Span>) -> Self {
378 Self::with_word_payload(
379 kind,
380 LexedWord::borrowed(Self::word_segment_kind(kind), text, text_span),
381 )
382 }
383
384 fn owned_word(kind: TokenKind, text: String) -> Self {
385 Self::with_word_payload(kind, LexedWord::owned(Self::word_segment_kind(kind), text))
386 }
387
388 fn comment() -> Self {
389 Self {
390 kind: TokenKind::Comment,
391 span: Span::new(),
392 flags: TokenFlags::empty(),
393 payload: TokenPayload::None,
394 }
395 }
396
397 fn fd(kind: TokenKind, fd: i32) -> Self {
398 Self {
399 kind,
400 span: Span::new(),
401 flags: TokenFlags::empty(),
402 payload: TokenPayload::Fd(fd),
403 }
404 }
405
406 fn fd_pair(kind: TokenKind, src_fd: i32, dst_fd: i32) -> Self {
407 Self {
408 kind,
409 span: Span::new(),
410 flags: TokenFlags::empty(),
411 payload: TokenPayload::FdPair(src_fd, dst_fd),
412 }
413 }
414
415 fn error(kind: LexerErrorKind) -> Self {
416 Self {
417 kind: TokenKind::Error,
418 span: Span::new(),
419 flags: TokenFlags::empty(),
420 payload: TokenPayload::Error(kind),
421 }
422 }
423
424 pub(crate) fn with_span(mut self, span: Span) -> Self {
425 self.span = span;
426 self
427 }
428
429 pub(crate) fn rebased(mut self, base: Position) -> Self {
430 self.span = self.span.rebased(base);
431 self.payload = match self.payload {
432 TokenPayload::Word(word) => TokenPayload::Word(word.rebased(base)),
433 payload => payload,
434 };
435 self
436 }
437
438 pub(crate) fn with_synthetic_flag(mut self) -> Self {
439 self.flags = self.flags.with_synthetic();
440 self
441 }
442
443 pub(crate) fn into_owned<'b>(self) -> LexedToken<'b> {
444 let payload = match self.payload {
445 TokenPayload::None => TokenPayload::None,
446 TokenPayload::Word(word) => TokenPayload::Word(word.into_owned()),
447 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
448 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
449 TokenPayload::Error(kind) => TokenPayload::Error(kind),
450 };
451
452 LexedToken {
453 kind: self.kind,
454 span: self.span,
455 flags: self.flags,
456 payload,
457 }
458 }
459
460 pub(crate) fn into_shared<'b>(self, source: &Arc<str>) -> LexedToken<'b> {
461 let payload = match self.payload {
462 TokenPayload::None => TokenPayload::None,
463 TokenPayload::Word(word) => TokenPayload::Word(word.into_shared(source)),
464 TokenPayload::Fd(fd) => TokenPayload::Fd(fd),
465 TokenPayload::FdPair(src_fd, dst_fd) => TokenPayload::FdPair(src_fd, dst_fd),
466 TokenPayload::Error(kind) => TokenPayload::Error(kind),
467 };
468
469 LexedToken {
470 kind: self.kind,
471 span: self.span,
472 flags: self.flags,
473 payload,
474 }
475 }
476
477 pub(crate) fn word_text(&self) -> Option<&str> {
479 self.kind
480 .is_word_like()
481 .then_some(())
482 .and_then(|_| match &self.payload {
483 TokenPayload::Word(word) => word.text(),
484 _ => None,
485 })
486 }
487
488 pub(crate) fn word_string(&self) -> Option<String> {
490 self.kind
491 .is_word_like()
492 .then_some(())
493 .and_then(|_| match &self.payload {
494 TokenPayload::Word(word) => Some(word.joined_text()),
495 _ => None,
496 })
497 }
498
499 pub(crate) fn word(&self) -> Option<&LexedWord<'a>> {
501 match &self.payload {
502 TokenPayload::Word(word) => Some(word),
503 _ => None,
504 }
505 }
506
507 pub(crate) fn source_slice<'b>(&self, source: &'b str) -> Option<&'b str> {
509 if !self.kind.is_word_like() || self.flags.has_cooked_text() || self.flags.is_synthetic() {
510 return None;
511 }
512
513 (self.span.start.offset <= self.span.end.offset && self.span.end.offset <= source.len())
514 .then(|| &source[self.span.start.offset..self.span.end.offset])
515 }
516
517 pub(crate) fn fd_value(&self) -> Option<i32> {
519 match self.payload {
520 TokenPayload::Fd(fd) => Some(fd),
521 _ => None,
522 }
523 }
524
525 pub(crate) fn fd_pair_value(&self) -> Option<(i32, i32)> {
527 match self.payload {
528 TokenPayload::FdPair(src_fd, dst_fd) => Some((src_fd, dst_fd)),
529 _ => None,
530 }
531 }
532
533 pub(crate) fn error_kind(&self) -> Option<LexerErrorKind> {
535 match self.payload {
536 TokenPayload::Error(kind) => Some(kind),
537 _ => None,
538 }
539 }
540}
541
542#[derive(Debug, Clone, PartialEq)]
544pub(crate) struct HeredocRead {
545 pub content: String,
547 pub content_span: Span,
549}
550
551const DEFAULT_MAX_SUBST_DEPTH: usize = 50;
554const MAX_PARAMETER_EXPANSION_SCAN_DEPTH: usize = 4;
555
556#[derive(Clone, Debug)]
557struct Cursor<'a> {
558 rest: &'a str,
559}
560
561impl<'a> Cursor<'a> {
562 fn new(source: &'a str) -> Self {
563 Self { rest: source }
564 }
565
566 fn first(&self) -> Option<char> {
567 self.rest.chars().next()
568 }
569
570 fn second(&self) -> Option<char> {
571 let mut chars = self.rest.chars();
572 chars.next()?;
573 chars.next()
574 }
575
576 fn third(&self) -> Option<char> {
577 let mut chars = self.rest.chars();
578 chars.next()?;
579 chars.next()?;
580 chars.next()
581 }
582
583 fn bump(&mut self) -> Option<char> {
584 let ch = self.first()?;
585 self.rest = &self.rest[ch.len_utf8()..];
586 Some(ch)
587 }
588
589 fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) -> &'a str {
590 let start = self.rest;
591 let mut end = 0;
592
593 for ch in start.chars() {
594 if !predicate(ch) {
595 break;
596 }
597 end += ch.len_utf8();
598 }
599
600 self.rest = &start[end..];
601 &start[..end]
602 }
603
604 fn rest(&self) -> &'a str {
605 self.rest
606 }
607
608 fn skip_bytes(&mut self, count: usize) {
609 self.rest = &self.rest[count..];
610 }
611
612 fn find_byte(&self, byte: u8) -> Option<usize> {
613 memchr(byte, self.rest.as_bytes())
614 }
615}
616
617#[derive(Clone, Debug)]
618struct PositionMap<'a> {
619 source: &'a str,
620 line_starts: Arc<[usize]>,
621 cached: Position,
622}
623
624#[cfg(feature = "benchmarking")]
625#[derive(Clone, Copy, Debug, Default)]
626pub(crate) struct LexerBenchmarkCounters {
627 pub(crate) current_position_calls: u64,
628}
629
630impl<'a> PositionMap<'a> {
631 fn new(source: &'a str) -> Self {
632 let mut line_starts =
633 Vec::with_capacity(source.bytes().filter(|byte| *byte == b'\n').count() + 1);
634 line_starts.push(0);
635 line_starts.extend(
636 source
637 .bytes()
638 .enumerate()
639 .filter_map(|(index, byte)| (byte == b'\n').then_some(index + 1)),
640 );
641
642 Self {
643 source,
644 line_starts: line_starts.into(),
645 cached: Position::new(),
646 }
647 }
648
649 fn position(&mut self, offset: usize) -> Position {
650 if offset == self.cached.offset {
651 return self.cached;
652 }
653
654 let position = if offset > self.cached.offset && offset <= self.source.len() {
655 Self::advance_from(self.cached, &self.source[self.cached.offset..offset])
656 } else {
657 self.position_uncached(offset)
658 };
659 self.cached = position;
660 position
661 }
662
663 fn position_uncached(&self, offset: usize) -> Position {
664 let offset = offset.min(self.source.len());
665 let line_index = self
666 .line_starts
667 .partition_point(|start| *start <= offset)
668 .saturating_sub(1);
669 let line_start = self.line_starts[line_index];
670 let line_text = &self.source[line_start..offset];
671 let column = if line_text.is_ascii() {
672 line_text.len() + 1
673 } else {
674 line_text.chars().count() + 1
675 };
676
677 Position {
678 line: line_index + 1,
679 column,
680 offset,
681 }
682 }
683
684 fn advance_from(mut position: Position, text: &str) -> Position {
685 position.offset += text.len();
686 let newline_count = memchr_iter(b'\n', text.as_bytes()).count();
687 if newline_count == 0 {
688 position.column += if text.is_ascii() {
689 text.len()
690 } else {
691 text.chars().count()
692 };
693 return position;
694 }
695
696 position.line += newline_count;
697 let tail_start = memrchr(b'\n', text.as_bytes())
698 .map(|index| index + 1)
699 .unwrap_or_default();
700 let tail = &text[tail_start..];
701 position.column = if tail.is_ascii() {
702 tail.len() + 1
703 } else {
704 tail.chars().count() + 1
705 };
706 position
707 }
708}
709
710#[derive(Clone)]
712pub struct Lexer<'a> {
713 #[allow(dead_code)] input: &'a str,
715 offset: usize,
717 cursor: Cursor<'a>,
718 position_map: PositionMap<'a>,
719 reinject_buf: VecDeque<char>,
722 reinject_resume_offset: Option<usize>,
724 max_subst_depth: usize,
726 initial_zsh_options: Option<ZshOptionState>,
727 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
728 zsh_timeline_index: usize,
729 #[cfg(feature = "benchmarking")]
730 benchmark_counters: Option<LexerBenchmarkCounters>,
731}
732
733impl<'a> Lexer<'a> {
734 pub fn new(input: &'a str) -> Self {
736 Self::with_max_subst_depth_and_profile(
737 input,
738 DEFAULT_MAX_SUBST_DEPTH,
739 &ShellProfile::native(super::ShellDialect::Bash),
740 None,
741 )
742 }
743
744 pub(super) fn with_max_subst_depth(input: &'a str, max_depth: usize) -> Self {
747 Self::with_max_subst_depth_and_profile(
748 input,
749 max_depth,
750 &ShellProfile::native(super::ShellDialect::Bash),
751 None,
752 )
753 }
754
755 #[cfg(test)]
757 fn with_profile(input: &'a str, shell_profile: &ShellProfile) -> Self {
758 let zsh_timeline = (shell_profile.dialect == super::ShellDialect::Zsh)
759 .then(|| ZshOptionTimeline::build(input, shell_profile))
760 .flatten()
761 .map(Arc::new);
762 Self::with_max_subst_depth_and_profile(
763 input,
764 DEFAULT_MAX_SUBST_DEPTH,
765 shell_profile,
766 zsh_timeline,
767 )
768 }
769
770 pub(crate) fn with_max_subst_depth_and_profile(
771 input: &'a str,
772 max_depth: usize,
773 shell_profile: &ShellProfile,
774 zsh_timeline: Option<Arc<ZshOptionTimeline>>,
775 ) -> Self {
776 Self {
777 input,
778 offset: 0,
779 cursor: Cursor::new(input),
780 position_map: PositionMap::new(input),
781 reinject_buf: VecDeque::new(),
782 reinject_resume_offset: None,
783 max_subst_depth: max_depth,
784 initial_zsh_options: shell_profile.zsh_options().cloned(),
785 zsh_timeline,
786 zsh_timeline_index: 0,
787 #[cfg(feature = "benchmarking")]
788 benchmark_counters: None,
789 }
790 }
791
792 pub(super) fn position_at_offset(&self, offset: usize) -> Position {
793 self.position_map.position_uncached(offset)
794 }
795
796 fn current_position(&mut self) -> Position {
797 #[cfg(feature = "benchmarking")]
798 self.maybe_record_current_position_call();
799 self.position_map.position(self.offset)
800 }
801
802 #[cfg(feature = "benchmarking")]
803 pub(crate) fn enable_benchmark_counters(&mut self) {
804 self.benchmark_counters = Some(LexerBenchmarkCounters::default());
805 }
806
807 #[cfg(feature = "benchmarking")]
808 pub(crate) fn benchmark_counters(&self) -> LexerBenchmarkCounters {
809 self.benchmark_counters.unwrap_or_default()
810 }
811
812 #[cfg(feature = "benchmarking")]
813 fn maybe_record_current_position_call(&mut self) {
814 if let Some(counters) = &mut self.benchmark_counters {
815 counters.current_position_calls += 1;
816 }
817 }
818
819 fn sync_offset_to_cursor(&mut self) {
820 if self.reinject_buf.is_empty()
821 && let Some(offset) = self.reinject_resume_offset.take()
822 {
823 self.offset = offset;
824 }
825 }
826
827 pub fn next_token_kind(&mut self) -> Option<TokenKind> {
830 self.next_lexed_token().map(|token| token.kind)
831 }
832
833 fn peek_char(&mut self) -> Option<char> {
834 self.sync_offset_to_cursor();
835 if let Some(&ch) = self.reinject_buf.front() {
836 Some(ch)
837 } else {
838 self.cursor.first()
839 }
840 }
841
842 fn advance(&mut self) -> Option<char> {
843 self.sync_offset_to_cursor();
844 let ch = if !self.reinject_buf.is_empty() {
845 self.reinject_buf.pop_front()
846 } else {
847 self.cursor.bump()
848 };
849 if let Some(c) = ch {
850 self.offset += c.len_utf8();
851 }
852 ch
853 }
854
855 fn lookahead_chars(&self) -> impl Iterator<Item = char> + '_ {
856 self.reinject_buf
857 .iter()
858 .copied()
859 .chain(self.cursor.rest().chars())
860 }
861
862 fn second_char(&self) -> Option<char> {
863 match self.reinject_buf.len() {
864 0 => self.cursor.second(),
865 1 => self.cursor.first(),
866 _ => self.reinject_buf.get(1).copied(),
867 }
868 }
869
870 fn third_char(&self) -> Option<char> {
871 match self.reinject_buf.len() {
872 0 => self.cursor.third(),
873 1 => self.cursor.second(),
874 2 => self.cursor.first(),
875 _ => self.reinject_buf.get(2).copied(),
876 }
877 }
878
879 fn fourth_char(&self) -> Option<char> {
880 match self.reinject_buf.len() {
881 0 => self.cursor.rest().chars().nth(3),
882 1 => self.cursor.third(),
883 2 => self.cursor.second(),
884 3 => self.cursor.first(),
885 _ => self.reinject_buf.get(3).copied(),
886 }
887 }
888
889 fn consume_source_bytes(&mut self, byte_len: usize) {
890 debug_assert!(self.reinject_buf.is_empty());
891 self.sync_offset_to_cursor();
892 self.offset += byte_len;
893 self.cursor.skip_bytes(byte_len);
894 }
895
896 fn advance_scanned_source_bytes(&mut self, byte_len: usize) {
897 debug_assert!(self.reinject_buf.is_empty());
898 self.offset += byte_len;
899 }
900
901 fn consume_ascii_chars(&mut self, count: usize) {
902 if self.reinject_buf.is_empty() {
903 self.consume_source_bytes(count);
904 return;
905 }
906
907 for _ in 0..count {
908 self.advance();
909 }
910 }
911
912 fn source_horizontal_whitespace_len(&self) -> usize {
913 self.cursor
914 .rest()
915 .as_bytes()
916 .iter()
917 .take_while(|byte| matches!(**byte, b' ' | b'\t'))
918 .count()
919 }
920
921 fn source_ascii_plain_word_len(&self) -> usize {
922 self.cursor
923 .rest()
924 .as_bytes()
925 .iter()
926 .take_while(|byte| Self::is_ascii_plain_word_byte(**byte))
927 .count()
928 }
929
930 fn find_double_quote_special(source: &str) -> Option<usize> {
931 source
932 .as_bytes()
933 .iter()
934 .position(|byte| matches!(*byte, b'"' | b'\\' | b'$' | b'`'))
935 }
936
937 fn ensure_capture_from_source(
938 &self,
939 capture: &mut Option<String>,
940 start: Position,
941 end: Position,
942 ) {
943 if capture.is_none() {
944 *capture = Some(self.input[start.offset..end.offset].to_string());
945 }
946 }
947
948 fn push_capture_char(capture: &mut Option<String>, ch: char) {
949 if let Some(text) = capture.as_mut() {
950 text.push(ch);
951 }
952 }
953
954 fn push_capture_str(capture: &mut Option<String>, text: &str) {
955 if let Some(current) = capture.as_mut() {
956 current.push_str(text);
957 }
958 }
959
960 fn current_zsh_options(&mut self) -> Option<&ZshOptionState> {
961 if let Some(timeline) = self.zsh_timeline.as_ref() {
962 while self.zsh_timeline_index < timeline.entries.len()
963 && timeline.entries[self.zsh_timeline_index].offset <= self.offset
964 {
965 self.zsh_timeline_index += 1;
966 }
967 return if self.zsh_timeline_index == 0 {
968 self.initial_zsh_options.as_ref()
969 } else {
970 Some(&timeline.entries[self.zsh_timeline_index - 1].state)
971 };
972 }
973
974 self.initial_zsh_options.as_ref()
975 }
976
977 fn comments_enabled(&mut self) -> bool {
978 !self
979 .current_zsh_options()
980 .is_some_and(|options| options.interactive_comments.is_definitely_off())
981 }
982
983 fn rc_quotes_enabled(&mut self) -> bool {
984 self.current_zsh_options()
985 .is_some_and(|options| options.rc_quotes.is_definitely_on())
986 }
987
988 fn ignore_braces_enabled(&mut self) -> bool {
989 self.current_zsh_options()
990 .is_some_and(|options| options.ignore_braces.is_definitely_on())
991 }
992
993 fn ignore_close_braces_enabled(&mut self) -> bool {
994 self.current_zsh_options().is_some_and(|options| {
995 options.ignore_braces.is_definitely_on()
996 || options.ignore_close_braces.is_definitely_on()
997 })
998 }
999
1000 fn should_treat_hash_as_word_char(&mut self) -> bool {
1001 if !self.comments_enabled() {
1002 return true;
1003 }
1004 self.reinject_buf.is_empty()
1005 && (self
1006 .input
1007 .get(..self.offset)
1008 .and_then(|prefix| prefix.chars().next_back())
1009 .is_some_and(|prev| {
1010 !prev.is_whitespace() && !matches!(prev, ';' | '|' | '&' | '<' | '>')
1011 })
1012 || self.is_inside_unclosed_double_paren_on_line())
1013 }
1014
1015 fn current_word_text<'b>(&'b self, start: Position, capture: &'b Option<String>) -> &'b str {
1016 capture
1017 .as_deref()
1018 .unwrap_or(&self.input[start.offset..self.offset])
1019 }
1020
1021 fn current_word_surface_is_single_char(
1022 &self,
1023 start: Position,
1024 capture: &Option<String>,
1025 target: char,
1026 ) -> bool {
1027 let text = self.current_word_text(start, capture);
1028 if !text.contains('\x00') {
1029 let mut encoded = [0; 4];
1030 return text == target.encode_utf8(&mut encoded);
1031 }
1032
1033 let mut chars = text.chars().filter(|&ch| ch != '\x00');
1034 matches!((chars.next(), chars.next()), (Some(ch), None) if ch == target)
1035 }
1036
1037 fn current_word_surface_last_char<'b>(
1038 &'b self,
1039 start: Position,
1040 capture: &'b Option<String>,
1041 ) -> Option<char> {
1042 self.current_word_text(start, capture)
1043 .chars()
1044 .rev()
1045 .find(|&ch| ch != '\x00')
1046 }
1047
1048 fn current_word_surface_ends_with_char(
1049 &self,
1050 start: Position,
1051 capture: &Option<String>,
1052 target: char,
1053 ) -> bool {
1054 self.current_word_surface_last_char(start, capture) == Some(target)
1055 }
1056
1057 fn current_word_surface_ends_with_extglob_prefix(
1058 &self,
1059 start: Position,
1060 capture: &Option<String>,
1061 ) -> bool {
1062 self.current_word_surface_last_char(start, capture)
1063 .is_some_and(|ch| matches!(ch, '@' | '?' | '*' | '+' | '!'))
1064 }
1065
1066 pub fn next_lexed_token(&mut self) -> Option<LexedToken<'a>> {
1068 self.skip_whitespace();
1069 let start = self.current_position();
1070 let token = self.next_lexed_token_inner(false)?;
1071 let end = self.current_position();
1072 Some(token.with_span(Span::from_positions(start, end)))
1073 }
1074
1075 pub(super) fn next_lexed_token_with_comments(&mut self) -> Option<LexedToken<'a>> {
1077 self.skip_whitespace();
1078 let start = self.current_position();
1079 let token = self.next_lexed_token_inner(true)?;
1080 let end = self.current_position();
1081 Some(token.with_span(Span::from_positions(start, end)))
1082 }
1083
1084 fn next_lexed_token_inner(&mut self, preserve_comments: bool) -> Option<LexedToken<'a>> {
1086 let ch = self.peek_char()?;
1087
1088 match ch {
1089 '\n' => {
1090 self.consume_ascii_chars(1);
1091 Some(LexedToken::punctuation(TokenKind::Newline))
1092 }
1093 ';' => {
1094 if self.second_char() == Some(';') {
1095 if self.third_char() == Some('&') {
1096 self.consume_ascii_chars(3);
1097 Some(LexedToken::punctuation(TokenKind::DoubleSemiAmp)) } else {
1099 self.consume_ascii_chars(2);
1100 Some(LexedToken::punctuation(TokenKind::DoubleSemicolon)) }
1102 } else if self.second_char() == Some('|') {
1103 self.consume_ascii_chars(2);
1104 Some(LexedToken::punctuation(TokenKind::SemiPipe)) } else if self.second_char() == Some('&') {
1106 self.consume_ascii_chars(2);
1107 Some(LexedToken::punctuation(TokenKind::SemiAmp)) } else {
1109 self.consume_ascii_chars(1);
1110 Some(LexedToken::punctuation(TokenKind::Semicolon))
1111 }
1112 }
1113 '|' => {
1114 if self.second_char() == Some('|') {
1115 self.consume_ascii_chars(2);
1116 Some(LexedToken::punctuation(TokenKind::Or))
1117 } else if self.second_char() == Some('&') {
1118 self.consume_ascii_chars(2);
1119 Some(LexedToken::punctuation(TokenKind::PipeBoth))
1120 } else {
1121 self.consume_ascii_chars(1);
1122 Some(LexedToken::punctuation(TokenKind::Pipe))
1123 }
1124 }
1125 '&' => {
1126 if self.second_char() == Some('&') {
1127 self.consume_ascii_chars(2);
1128 Some(LexedToken::punctuation(TokenKind::And))
1129 } else if self.second_char() == Some('>') {
1130 if self.third_char() == Some('>') {
1131 self.consume_ascii_chars(3);
1132 Some(LexedToken::punctuation(TokenKind::RedirectBothAppend))
1133 } else {
1134 self.consume_ascii_chars(2);
1135 Some(LexedToken::punctuation(TokenKind::RedirectBoth))
1136 }
1137 } else if self.second_char() == Some('|') {
1138 self.consume_ascii_chars(2);
1139 Some(LexedToken::punctuation(TokenKind::BackgroundPipe))
1140 } else if self.second_char() == Some('!') {
1141 self.consume_ascii_chars(2);
1142 Some(LexedToken::punctuation(TokenKind::BackgroundBang))
1143 } else {
1144 self.consume_ascii_chars(1);
1145 Some(LexedToken::punctuation(TokenKind::Background))
1146 }
1147 }
1148 '>' => {
1149 if self.second_char() == Some('>') {
1150 if self.third_char() == Some('|') {
1151 self.consume_ascii_chars(3);
1152 } else {
1153 self.consume_ascii_chars(2);
1154 }
1155 Some(LexedToken::punctuation(TokenKind::RedirectAppend))
1156 } else if self.second_char() == Some('|') {
1157 self.consume_ascii_chars(2);
1158 Some(LexedToken::punctuation(TokenKind::Clobber))
1159 } else if self.second_char() == Some('(') {
1160 self.consume_ascii_chars(2);
1161 Some(LexedToken::punctuation(TokenKind::ProcessSubOut))
1162 } else if self.second_char() == Some('&') {
1163 self.consume_ascii_chars(2);
1164 Some(LexedToken::punctuation(TokenKind::DupOutput))
1165 } else {
1166 self.consume_ascii_chars(1);
1167 Some(LexedToken::punctuation(TokenKind::RedirectOut))
1168 }
1169 }
1170 '<' => {
1171 if self.second_char() == Some('<') {
1172 if self.third_char() == Some('<') {
1173 self.consume_ascii_chars(3);
1174 Some(LexedToken::punctuation(TokenKind::HereString))
1175 } else if self.third_char() == Some('-') {
1176 self.consume_ascii_chars(3);
1177 Some(LexedToken::punctuation(TokenKind::HereDocStrip))
1178 } else {
1179 self.consume_ascii_chars(2);
1180 Some(LexedToken::punctuation(TokenKind::HereDoc))
1181 }
1182 } else if self.second_char() == Some('>') {
1183 self.consume_ascii_chars(2);
1184 Some(LexedToken::punctuation(TokenKind::RedirectReadWrite))
1185 } else if self.second_char() == Some('(') {
1186 self.consume_ascii_chars(2);
1187 Some(LexedToken::punctuation(TokenKind::ProcessSubIn))
1188 } else if self.second_char() == Some('&') {
1189 self.consume_ascii_chars(2);
1190 Some(LexedToken::punctuation(TokenKind::DupInput))
1191 } else {
1192 self.consume_ascii_chars(1);
1193 Some(LexedToken::punctuation(TokenKind::RedirectIn))
1194 }
1195 }
1196 '(' => {
1197 if self.second_char() == Some('(') {
1198 self.consume_ascii_chars(2);
1199 Some(LexedToken::punctuation(TokenKind::DoubleLeftParen))
1200 } else {
1201 self.consume_ascii_chars(1);
1202 Some(LexedToken::punctuation(TokenKind::LeftParen))
1203 }
1204 }
1205 ')' => {
1206 if self.second_char() == Some(')') {
1207 self.consume_ascii_chars(2);
1208 Some(LexedToken::punctuation(TokenKind::DoubleRightParen))
1209 } else {
1210 self.consume_ascii_chars(1);
1211 Some(LexedToken::punctuation(TokenKind::RightParen))
1212 }
1213 }
1214 '{' => {
1215 let start = self.current_position();
1216 if self.ignore_braces_enabled() {
1217 self.consume_ascii_chars(1);
1218 match self.peek_char() {
1219 Some(' ') | Some('\t') | Some('\n') | None => {
1220 Some(LexedToken::borrowed_word(TokenKind::Word, "{", None))
1221 }
1222 _ => self.read_word_starting_with("{", start),
1223 }
1224 } else if self.looks_like_brace_expansion() {
1225 self.read_brace_expansion_word()
1229 } else if self.is_brace_group_start() {
1230 self.advance();
1231 Some(LexedToken::punctuation(TokenKind::LeftBrace))
1232 } else if self.brace_literal_starts_case_pattern_delimiter() {
1233 self.read_word_starting_with("{", start)
1234 } else {
1235 self.read_brace_literal_word()
1236 }
1237 }
1238 '}' => {
1239 self.consume_ascii_chars(1);
1240 if self.ignore_close_braces_enabled() {
1241 Some(LexedToken::borrowed_word(TokenKind::Word, "}", None))
1242 } else {
1243 Some(LexedToken::punctuation(TokenKind::RightBrace))
1244 }
1245 }
1246 '[' => {
1247 let start = self.current_position();
1248 self.consume_ascii_chars(1);
1249 if self.peek_char() == Some('[')
1250 && matches!(
1251 self.second_char(),
1252 Some(' ') | Some('\t') | Some('\n') | None
1253 )
1254 {
1255 self.consume_ascii_chars(1);
1256 Some(LexedToken::punctuation(TokenKind::DoubleLeftBracket))
1257 } else {
1258 match self.peek_char() {
1265 Some(' ') | Some('\t') | Some('\n') | None => {
1266 Some(LexedToken::borrowed_word(TokenKind::Word, "[", None))
1267 }
1268 _ => self.read_word_starting_with("[", start),
1269 }
1270 }
1271 }
1272 ']' => {
1273 if self.second_char() == Some(']') {
1274 self.consume_ascii_chars(2);
1275 Some(LexedToken::punctuation(TokenKind::DoubleRightBracket))
1276 } else {
1277 self.consume_ascii_chars(1);
1278 Some(LexedToken::borrowed_word(TokenKind::Word, "]", None))
1279 }
1280 }
1281 '\'' => self.read_single_quoted_string(),
1282 '"' => self.read_double_quoted_string(),
1283 '#' => {
1284 if self.should_treat_hash_as_word_char() {
1285 let start = self.current_position();
1286 return self.read_word_starting_with("#", start);
1287 }
1288 if preserve_comments {
1289 self.read_comment();
1290 Some(LexedToken::comment())
1291 } else {
1292 self.skip_comment();
1293 self.next_lexed_token_inner(false)
1294 }
1295 }
1296 '0'..='9' => self.read_word_or_fd_redirect(),
1298 _ => self.read_word(),
1299 }
1300 }
1301
1302 fn skip_whitespace(&mut self) {
1303 while let Some(ch) = self.peek_char() {
1304 if self.reinject_buf.is_empty() {
1305 let whitespace_len = self.source_horizontal_whitespace_len();
1306 if whitespace_len > 0 {
1307 self.consume_source_bytes(whitespace_len);
1308 continue;
1309 }
1310
1311 if self.cursor.rest().starts_with("\\\n") {
1312 self.consume_source_bytes(2);
1313 continue;
1314 }
1315 }
1316
1317 if ch == ' ' || ch == '\t' {
1318 self.consume_ascii_chars(1);
1319 } else if ch == '\\' {
1320 if self.second_char() == Some('\n') {
1322 self.consume_ascii_chars(2);
1323 } else {
1324 break;
1325 }
1326 } else {
1327 break;
1328 }
1329 }
1330 }
1331
1332 fn skip_comment(&mut self) {
1333 if self.reinject_buf.is_empty() {
1334 let end = self
1335 .cursor
1336 .find_byte(b'\n')
1337 .unwrap_or(self.cursor.rest().len());
1338 self.consume_source_bytes(end);
1339 return;
1340 }
1341
1342 while let Some(ch) = self.peek_char() {
1343 if ch == '\n' {
1344 break;
1345 }
1346 self.advance();
1347 }
1348 }
1349
1350 fn read_comment(&mut self) {
1351 debug_assert_eq!(self.peek_char(), Some('#'));
1352
1353 if self.reinject_buf.is_empty() {
1354 let rest = self.cursor.rest();
1355 let end = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
1356 self.consume_source_bytes(end);
1357 return;
1358 }
1359
1360 self.advance(); while let Some(ch) = self.peek_char() {
1363 if ch == '\n' {
1364 break;
1365 }
1366 self.advance();
1367 }
1368 }
1369
1370 fn is_inside_unclosed_double_paren_on_line(&self) -> bool {
1371 if !self.reinject_buf.is_empty() || self.offset > self.input.len() {
1372 return false;
1373 }
1374
1375 let line_start = self.input[..self.offset]
1376 .rfind('\n')
1377 .map_or(0, |index| index + 1);
1378 let prefix = &self.input[line_start..self.offset];
1379 line_has_unclosed_double_paren(prefix)
1380 }
1381
1382 fn read_word_or_fd_redirect(&mut self) -> Option<LexedToken<'a>> {
1385 if let Some(first_digit) = self.peek_char().filter(|ch| ch.is_ascii_digit()) {
1386 let Some(fd) = first_digit.to_digit(10) else {
1387 unreachable!("peeked ASCII digit should convert to a base-10 digit");
1388 };
1389 let fd = fd as i32;
1390
1391 match (self.second_char(), self.third_char()) {
1392 (Some('>'), Some('>')) => {
1393 if self.fourth_char() == Some('|') {
1394 self.consume_ascii_chars(4);
1395 } else {
1396 self.consume_ascii_chars(3);
1397 }
1398 return Some(LexedToken::fd(TokenKind::RedirectFdAppend, fd));
1399 }
1400 (Some('>'), Some('|')) => {
1401 self.consume_ascii_chars(3);
1402 return Some(LexedToken::fd(TokenKind::Clobber, fd));
1403 }
1404 (Some('>'), Some('&')) => {
1405 self.consume_ascii_chars(3);
1406
1407 let mut target_str = String::with_capacity(4);
1408 while let Some(c) = self.peek_char() {
1409 if c.is_ascii_digit() {
1410 target_str.push(c);
1411 self.advance();
1412 } else {
1413 break;
1414 }
1415 }
1416
1417 if target_str.is_empty() {
1418 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1419 }
1420
1421 let target_fd: i32 = target_str.parse().unwrap_or(1);
1422 return Some(LexedToken::fd_pair(TokenKind::DupFd, fd, target_fd));
1423 }
1424 (Some('>'), _) => {
1425 self.consume_ascii_chars(2);
1426 return Some(LexedToken::fd(TokenKind::RedirectFd, fd));
1427 }
1428 (Some('<'), Some('&')) => {
1429 self.consume_ascii_chars(3);
1430
1431 let mut target_str = String::with_capacity(4);
1432 while let Some(c) = self.peek_char() {
1433 if c.is_ascii_digit() || c == '-' {
1434 target_str.push(c);
1435 self.advance();
1436 if c == '-' {
1437 break;
1438 }
1439 } else {
1440 break;
1441 }
1442 }
1443
1444 if target_str == "-" {
1445 return Some(LexedToken::fd(TokenKind::DupFdClose, fd));
1446 }
1447 let target_fd: i32 = target_str.parse().unwrap_or(0);
1448 return Some(LexedToken::fd_pair(TokenKind::DupFdIn, fd, target_fd));
1449 }
1450 (Some('<'), Some('>')) => {
1451 self.consume_ascii_chars(3);
1452 return Some(LexedToken::fd(TokenKind::RedirectFdReadWrite, fd));
1453 }
1454 (Some('<'), Some('<')) => {}
1455 (Some('<'), _) => {
1456 self.consume_ascii_chars(2);
1457 return Some(LexedToken::fd(TokenKind::RedirectFdIn, fd));
1458 }
1459 _ => {}
1460 }
1461 }
1462
1463 self.read_word()
1465 }
1466
1467 fn read_word_starting_with(
1468 &mut self,
1469 _prefix: &str,
1470 start: Position,
1471 ) -> Option<LexedToken<'a>> {
1472 let segment = match self.read_unquoted_segment(start) {
1473 Ok(segment) => segment,
1474 Err(kind) => return Some(LexedToken::error(kind)),
1475 };
1476 if segment.as_str().is_empty() {
1477 return None;
1478 }
1479 let mut lexed_word = LexedWord::from_segment(segment);
1480 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1481 return Some(LexedToken::error(kind));
1482 }
1483 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1484 }
1485
1486 fn read_word(&mut self) -> Option<LexedToken<'a>> {
1487 let start = self.current_position();
1488
1489 if self.reinject_buf.is_empty() {
1490 let ascii_len = self.source_ascii_plain_word_len();
1491 let chunk = if ascii_len > 0
1492 && self
1493 .cursor
1494 .rest()
1495 .as_bytes()
1496 .get(ascii_len)
1497 .is_none_or(|byte| byte.is_ascii())
1498 {
1499 self.consume_source_bytes(ascii_len);
1500 &self.input[start.offset..self.offset]
1501 } else {
1502 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1503 self.advance_scanned_source_bytes(chunk.len());
1504 chunk
1505 };
1506 if !chunk.is_empty() {
1507 let continues = matches!(
1508 self.peek_char(),
1509 Some(next)
1510 if Self::is_word_char(next)
1511 || next == '$'
1512 || matches!(next, '\'' | '"')
1513 || next == '{'
1514 || (next == '\\' && self.second_char() == Some('\n'))
1515 || (next == '('
1516 && (chunk.ends_with('=')
1517 || Self::word_can_take_parenthesized_suffix(chunk)))
1518 );
1519
1520 if !continues {
1521 let end = self.current_position();
1522 return Some(LexedToken::borrowed_word(
1523 TokenKind::Word,
1524 &self.input[start.offset..self.offset],
1525 Some(Span::from_positions(start, end)),
1526 ));
1527 }
1528
1529 if self.peek_char() == Some('(')
1530 && (chunk.ends_with('=') || Self::word_can_take_parenthesized_suffix(chunk))
1531 {
1532 return self.read_complex_word(start);
1533 }
1534
1535 let end = self.current_position();
1536 return self.finish_segmented_word(LexedWord::borrowed(
1537 LexedWordSegmentKind::Plain,
1538 &self.input[start.offset..self.offset],
1539 Some(Span::from_positions(start, end)),
1540 ));
1541 }
1542 }
1543
1544 self.read_complex_word(start)
1545 }
1546
1547 fn finish_segmented_word(&mut self, mut lexed_word: LexedWord<'a>) -> Option<LexedToken<'a>> {
1548 if let Err(kind) = self.append_segmented_continuation(&mut lexed_word) {
1549 return Some(LexedToken::error(kind));
1550 }
1551
1552 Some(LexedToken::with_word_payload(TokenKind::Word, lexed_word))
1553 }
1554
1555 fn read_complex_word(&mut self, start: Position) -> Option<LexedToken<'a>> {
1556 if self.peek_char() == Some('$') {
1557 match self.second_char() {
1558 Some('\'') => return self.read_dollar_single_quoted_string(),
1559 Some('"') => return self.read_dollar_double_quoted_string(),
1560 _ => {}
1561 }
1562 }
1563
1564 let segment = match self.read_unquoted_segment(start) {
1565 Ok(segment) => segment,
1566 Err(kind) => return Some(LexedToken::error(kind)),
1567 };
1568
1569 if segment.as_str().is_empty() {
1570 return None;
1571 }
1572
1573 self.finish_segmented_word(LexedWord::from_segment(segment))
1574 }
1575
1576 fn read_unquoted_segment(
1577 &mut self,
1578 start: Position,
1579 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1580 let mut word = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
1581 while let Some(ch) = self.peek_char() {
1582 if ch == '"' || ch == '\'' {
1583 break;
1584 } else if ch == '$' {
1585 if matches!(self.second_char(), Some('\'') | Some('"'))
1586 && (self.current_position().offset > start.offset
1587 || word.as_ref().is_some_and(|word| !word.is_empty()))
1588 {
1589 break;
1590 }
1591
1592 self.advance();
1594
1595 Self::push_capture_char(&mut word, ch); if self.peek_char() == Some('[') {
1599 Self::push_capture_char(&mut word, '[');
1600 self.advance();
1601 if !self.read_legacy_arithmetic_into(&mut word, start) {
1602 return Err(LexerErrorKind::CommandSubstitution);
1603 }
1604 } else if self.peek_char() == Some('(') {
1605 if self.second_char() == Some('(') {
1606 if !self.read_arithmetic_expansion_into(&mut word) {
1607 return Err(LexerErrorKind::CommandSubstitution);
1608 }
1609 } else {
1610 Self::push_capture_char(&mut word, '(');
1611 self.advance();
1612 if !self.read_command_subst_into(&mut word) {
1613 return Err(LexerErrorKind::CommandSubstitution);
1614 }
1615 }
1616 } else if self.peek_char() == Some('{') {
1617 Self::push_capture_char(&mut word, '{');
1620 self.advance();
1621 let _ = self.read_param_expansion_into(&mut word, start);
1622 } else {
1623 if let Some(c) = self.peek_char() {
1625 if matches!(c, '?' | '#' | '@' | '*' | '!' | '$' | '-')
1626 || c.is_ascii_digit()
1627 {
1628 Self::push_capture_char(&mut word, c);
1629 self.advance();
1630 } else {
1631 while let Some(c) = self.peek_char() {
1633 if c.is_ascii_alphanumeric() || c == '_' {
1634 Self::push_capture_char(&mut word, c);
1635 self.advance();
1636 } else {
1637 break;
1638 }
1639 }
1640 }
1641 }
1642 }
1643 } else if ch == '{' {
1644 if self.looks_like_mid_word_brace_segment() {
1645 Self::push_capture_char(&mut word, ch);
1648 self.advance();
1649 self.consume_mid_word_brace_segment(&mut word);
1650 } else {
1651 Self::push_capture_char(&mut word, ch);
1654 self.advance();
1655 }
1656 } else if ch == '`' {
1657 let capture_end = self.current_position();
1660 self.ensure_capture_from_source(&mut word, start, capture_end);
1661 Self::push_capture_char(&mut word, ch);
1662 self.advance(); let mut closed = false;
1664 while let Some(c) = self.peek_char() {
1665 Self::push_capture_char(&mut word, c);
1666 self.advance();
1667 if c == '`' {
1668 closed = true;
1669 break;
1670 }
1671 if c == '\\'
1672 && let Some(next) = self.peek_char()
1673 {
1674 Self::push_capture_char(&mut word, next);
1675 self.advance();
1676 }
1677 }
1678 if !closed {
1679 return Err(LexerErrorKind::BacktickSubstitution);
1680 }
1681 } else if ch == '\\' {
1682 let capture_end = self.current_position();
1683 self.ensure_capture_from_source(&mut word, start, capture_end);
1684 self.advance();
1685 if let Some(next) = self.peek_char() {
1686 if next == '\n' {
1687 self.advance();
1689 } else {
1690 Self::push_capture_char(&mut word, '\x00');
1695 Self::push_capture_char(&mut word, next);
1696 self.advance();
1697 if next == '{'
1698 && self.current_word_surface_is_single_char(start, &word, '{')
1699 && self.escaped_brace_sequence_looks_like_brace_expansion()
1700 {
1701 let mut depth = 1;
1702 while let Some(c) = self.peek_char() {
1703 Self::push_capture_char(&mut word, c);
1704 self.advance();
1705 match c {
1706 '{' => depth += 1,
1707 '}' => {
1708 depth -= 1;
1709 if depth == 0 {
1710 break;
1711 }
1712 }
1713 _ => {}
1714 }
1715 }
1716 }
1717 }
1718 } else {
1719 Self::push_capture_char(&mut word, '\\');
1720 }
1721 } else if ch == '('
1722 && self.current_word_surface_ends_with_char(start, &word, '=')
1723 && self.looks_like_assoc_assign()
1724 {
1725 Self::push_capture_char(&mut word, ch);
1728 self.advance();
1729 let mut depth = 1;
1730 while let Some(c) = self.peek_char() {
1731 Self::push_capture_char(&mut word, c);
1732 self.advance();
1733 match c {
1734 '(' => depth += 1,
1735 ')' => {
1736 depth -= 1;
1737 if depth == 0 {
1738 break;
1739 }
1740 }
1741 '"' => {
1742 while let Some(qc) = self.peek_char() {
1743 Self::push_capture_char(&mut word, qc);
1744 self.advance();
1745 if qc == '"' {
1746 break;
1747 }
1748 if qc == '\\'
1749 && let Some(esc) = self.peek_char()
1750 {
1751 Self::push_capture_char(&mut word, esc);
1752 self.advance();
1753 }
1754 }
1755 }
1756 '\'' => {
1757 while let Some(qc) = self.peek_char() {
1758 Self::push_capture_char(&mut word, qc);
1759 self.advance();
1760 if qc == '\'' {
1761 break;
1762 }
1763 }
1764 }
1765 '\\' => {
1766 if let Some(esc) = self.peek_char() {
1767 Self::push_capture_char(&mut word, esc);
1768 self.advance();
1769 }
1770 }
1771 _ => {}
1772 }
1773 }
1774 } else if ch == '(' && self.current_word_surface_ends_with_extglob_prefix(start, &word)
1775 {
1776 Self::push_capture_char(&mut word, ch);
1779 self.advance();
1780 let mut depth = 1;
1781 while let Some(c) = self.peek_char() {
1782 Self::push_capture_char(&mut word, c);
1783 self.advance();
1784 match c {
1785 '(' => depth += 1,
1786 ')' => {
1787 depth -= 1;
1788 if depth == 0 {
1789 break;
1790 }
1791 }
1792 '\\' => {
1793 if let Some(esc) = self.peek_char() {
1794 Self::push_capture_char(&mut word, esc);
1795 self.advance();
1796 }
1797 }
1798 _ => {}
1799 }
1800 }
1801 } else if Self::is_plain_word_char(ch) {
1802 if self.reinject_buf.is_empty() {
1803 let ascii_len = self.source_ascii_plain_word_len();
1804 let chunk = if ascii_len > 0
1805 && self
1806 .cursor
1807 .rest()
1808 .as_bytes()
1809 .get(ascii_len)
1810 .is_none_or(|byte| byte.is_ascii())
1811 {
1812 self.consume_source_bytes(ascii_len);
1813 &self.input[self.offset - ascii_len..self.offset]
1814 } else {
1815 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
1816 self.advance_scanned_source_bytes(chunk.len());
1817 chunk
1818 };
1819 Self::push_capture_str(&mut word, chunk);
1820 } else {
1821 Self::push_capture_char(&mut word, ch);
1822 self.advance();
1823 }
1824 } else {
1825 break;
1826 }
1827 }
1828
1829 if let Some(word) = word {
1830 let span = Some(Span::from_positions(start, self.current_position()));
1831 Ok(LexedWordSegment::owned_with_spans(
1832 LexedWordSegmentKind::Plain,
1833 word,
1834 span,
1835 span,
1836 ))
1837 } else {
1838 let end = self.current_position();
1839 Ok(LexedWordSegment::borrowed(
1840 LexedWordSegmentKind::Plain,
1841 &self.input[start.offset..self.offset],
1842 Some(Span::from_positions(start, end)),
1843 ))
1844 }
1845 }
1846
1847 fn read_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1848 let segment = match self.read_single_quoted_segment() {
1849 Ok(segment) => segment,
1850 Err(kind) => return Some(LexedToken::error(kind)),
1851 };
1852 let mut word = LexedWord::from_segment(segment);
1853 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1854 return Some(LexedToken::error(kind));
1855 }
1856
1857 Some(LexedToken::with_word_payload(TokenKind::LiteralWord, word))
1858 }
1859
1860 fn read_single_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1861 debug_assert_eq!(self.peek_char(), Some('\''));
1862
1863 let wrapper_start = self.current_position();
1864 self.consume_ascii_chars(1); let content_start = self.current_position();
1866 let can_borrow = self.reinject_buf.is_empty() && !self.rc_quotes_enabled();
1867 let mut content_end = content_start;
1868 let mut content = String::with_capacity(16);
1869 let mut closed = false;
1870
1871 if can_borrow {
1872 let rest = self.cursor.rest();
1873 if let Some(quote_index) = memchr(b'\'', rest.as_bytes()) {
1874 self.consume_source_bytes(quote_index);
1875 content_end = self.current_position();
1876 self.consume_ascii_chars(1); closed = true;
1878 } else {
1879 self.consume_source_bytes(rest.len());
1880 }
1881 }
1882
1883 while let Some(ch) = self.peek_char() {
1884 if closed {
1885 break;
1886 }
1887 if ch == '\'' {
1888 if self.rc_quotes_enabled() && self.second_char() == Some('\'') {
1889 if !can_borrow {
1890 content.push('\'');
1891 }
1892 self.advance();
1893 self.advance();
1894 continue;
1895 }
1896 content_end = self.current_position();
1897 self.consume_ascii_chars(1); closed = true;
1899 break;
1900 }
1901 if !can_borrow {
1902 content.push(ch);
1903 }
1904 self.advance();
1905 }
1906
1907 if !closed {
1908 return Err(LexerErrorKind::SingleQuote);
1909 }
1910
1911 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
1912 let content_span = Some(Span::from_positions(content_start, content_end));
1913
1914 if can_borrow {
1915 Ok(LexedWordSegment::borrowed_with_spans(
1916 LexedWordSegmentKind::SingleQuoted,
1917 &self.input[content_start.offset..content_end.offset],
1918 content_span,
1919 wrapper_span,
1920 ))
1921 } else {
1922 Ok(LexedWordSegment::owned_with_spans(
1923 LexedWordSegmentKind::SingleQuoted,
1924 content,
1925 content_span,
1926 wrapper_span,
1927 ))
1928 }
1929 }
1930
1931 fn read_dollar_single_quoted_string(&mut self) -> Option<LexedToken<'a>> {
1932 let segment = match self.read_dollar_single_quoted_segment() {
1933 Ok(segment) => segment,
1934 Err(kind) => return Some(LexedToken::error(kind)),
1935 };
1936 let mut word = LexedWord::from_segment(segment);
1937 if let Err(kind) = self.append_segmented_continuation(&mut word) {
1938 return Some(LexedToken::error(kind));
1939 }
1940
1941 let kind = if word.single_segment().is_some() {
1942 TokenKind::LiteralWord
1943 } else {
1944 TokenKind::Word
1945 };
1946
1947 Some(LexedToken::with_word_payload(kind, word))
1948 }
1949
1950 fn read_dollar_single_quoted_segment(
1951 &mut self,
1952 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
1953 debug_assert_eq!(self.peek_char(), Some('$'));
1954 debug_assert_eq!(self.second_char(), Some('\''));
1955
1956 let wrapper_start = self.current_position();
1957 self.consume_ascii_chars(2); let content_start = self.current_position();
1959 let mut out = String::with_capacity(16);
1960
1961 while let Some(ch) = self.peek_char() {
1962 if ch == '\'' {
1963 let content_end = self.current_position();
1964 self.advance();
1965 let wrapper_span =
1966 Some(Span::from_positions(wrapper_start, self.current_position()));
1967 let content_span = Some(Span::from_positions(content_start, content_end));
1968 return Ok(LexedWordSegment::owned_with_spans(
1969 LexedWordSegmentKind::DollarSingleQuoted,
1970 out,
1971 content_span,
1972 wrapper_span,
1973 ));
1974 }
1975
1976 if ch == '\\' {
1977 self.advance();
1978 if let Some(esc) = self.peek_char() {
1979 self.advance();
1980 match esc {
1981 'n' => out.push('\n'),
1982 't' => out.push('\t'),
1983 'r' => out.push('\r'),
1984 'a' => out.push('\x07'),
1985 'b' => out.push('\x08'),
1986 'f' => out.push('\x0C'),
1987 'v' => out.push('\x0B'),
1988 'e' | 'E' => out.push('\x1B'),
1989 '\\' => out.push('\\'),
1990 '\'' => out.push('\''),
1991 '"' => out.push('"'),
1992 '?' => out.push('?'),
1993 'c' => {
1994 if let Some(control) = self.peek_char() {
1995 self.advance();
1996 out.push(((control as u32 & 0x1F) as u8) as char);
1997 } else {
1998 out.push('\\');
1999 out.push('c');
2000 }
2001 }
2002 'x' => {
2003 let mut hex = String::new();
2004 for _ in 0..2 {
2005 if let Some(h) = self.peek_char() {
2006 if h.is_ascii_hexdigit() {
2007 hex.push(h);
2008 self.advance();
2009 } else {
2010 break;
2011 }
2012 }
2013 }
2014 if let Ok(val) = u8::from_str_radix(&hex, 16) {
2015 out.push(val as char);
2016 }
2017 }
2018 'u' => {
2019 let mut hex = String::new();
2020 for _ in 0..4 {
2021 if let Some(h) = self.peek_char() {
2022 if h.is_ascii_hexdigit() {
2023 hex.push(h);
2024 self.advance();
2025 } else {
2026 break;
2027 }
2028 }
2029 }
2030 if let Ok(val) = u32::from_str_radix(&hex, 16)
2031 && let Some(c) = char::from_u32(val)
2032 {
2033 out.push(c);
2034 }
2035 }
2036 'U' => {
2037 let mut hex = String::new();
2038 for _ in 0..8 {
2039 if let Some(h) = self.peek_char() {
2040 if h.is_ascii_hexdigit() {
2041 hex.push(h);
2042 self.advance();
2043 } else {
2044 break;
2045 }
2046 }
2047 }
2048 if let Ok(val) = u32::from_str_radix(&hex, 16)
2049 && let Some(c) = char::from_u32(val)
2050 {
2051 out.push(c);
2052 }
2053 }
2054 '0'..='7' => {
2055 let mut oct = String::new();
2056 oct.push(esc);
2057 for _ in 0..2 {
2058 if let Some(o) = self.peek_char() {
2059 if o.is_ascii_digit() && o < '8' {
2060 oct.push(o);
2061 self.advance();
2062 } else {
2063 break;
2064 }
2065 }
2066 }
2067 if let Ok(val) = u8::from_str_radix(&oct, 8) {
2068 out.push(val as char);
2069 }
2070 }
2071 _ => {
2072 out.push('\\');
2073 out.push(esc);
2074 }
2075 }
2076 } else {
2077 out.push('\\');
2078 }
2079 continue;
2080 }
2081
2082 out.push(ch);
2083 self.advance();
2084 }
2085
2086 Err(LexerErrorKind::SingleQuote)
2087 }
2088
2089 fn read_plain_continuation_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2090 let start = self.current_position();
2091
2092 if self.reinject_buf.is_empty() {
2093 let ascii_len = self.source_ascii_plain_word_len();
2094 let chunk = if ascii_len > 0
2095 && self
2096 .cursor
2097 .rest()
2098 .as_bytes()
2099 .get(ascii_len)
2100 .is_none_or(|byte| byte.is_ascii())
2101 {
2102 self.consume_source_bytes(ascii_len);
2103 &self.input[start.offset..self.offset]
2104 } else {
2105 let chunk = self.cursor.eat_while(Self::is_plain_word_char);
2106 self.advance_scanned_source_bytes(chunk.len());
2107 chunk
2108 };
2109 if chunk.is_empty() {
2110 return None;
2111 }
2112
2113 let end = self.current_position();
2114 return Some(LexedWordSegment::borrowed(
2115 LexedWordSegmentKind::Plain,
2116 &self.input[start.offset..self.offset],
2117 Some(Span::from_positions(start, end)),
2118 ));
2119 }
2120
2121 let ch = self.peek_char()?;
2122 if !Self::is_plain_word_char(ch) {
2123 return None;
2124 }
2125
2126 let mut text = String::with_capacity(16);
2127 while let Some(ch) = self.peek_char() {
2128 if !Self::is_plain_word_char(ch) {
2129 break;
2130 }
2131 text.push(ch);
2132 self.advance();
2133 }
2134
2135 Some(LexedWordSegment::owned(LexedWordSegmentKind::Plain, text))
2136 }
2137
2138 fn append_segmented_continuation(
2141 &mut self,
2142 word: &mut LexedWord<'a>,
2143 ) -> Result<(), LexerErrorKind> {
2144 loop {
2145 match self.peek_char() {
2146 Some('\\') if self.second_char() == Some('\n') => {
2147 self.advance();
2148 self.advance();
2149 continue;
2150 }
2151 Some('\'') => {
2152 word.push_segment(self.read_single_quoted_segment()?);
2153 }
2154 Some('"') => {
2155 word.push_segment(self.read_double_quoted_segment()?);
2156 }
2157 Some('$') if self.second_char() == Some('\'') => {
2158 word.push_segment(self.read_dollar_single_quoted_segment()?);
2159 }
2160 Some('$') if self.second_char() == Some('"') => {
2161 word.push_segment(self.read_dollar_double_quoted_segment()?);
2162 }
2163 Some('(') if Self::lexed_word_can_take_parenthesized_suffix(word) => {
2164 let Some(segment) = self.read_parenthesized_word_suffix_segment() else {
2165 unreachable!("peeked '(' should produce a suffix segment");
2166 };
2167 word.push_segment(segment);
2168 }
2169 _ => {
2170 if let Some(segment) = self.read_plain_continuation_segment() {
2171 word.push_segment(segment);
2172 continue;
2173 }
2174
2175 let start = self.current_position();
2176 let plain = self.read_unquoted_segment(start)?;
2177 if plain.as_str().is_empty() {
2178 break;
2179 }
2180 word.push_segment(plain);
2181 }
2182 }
2183 }
2184
2185 Ok(())
2186 }
2187
2188 fn read_parenthesized_word_suffix_segment(&mut self) -> Option<LexedWordSegment<'a>> {
2189 debug_assert_eq!(self.peek_char(), Some('('));
2190
2191 let start = self.current_position();
2192 let mut depth = 0usize;
2193 let mut escaped = false;
2194 let mut text = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2195
2196 while let Some(ch) = self.peek_char() {
2197 if let Some(text) = text.as_mut() {
2198 text.push(ch);
2199 }
2200 self.advance();
2201
2202 if escaped {
2203 escaped = false;
2204 continue;
2205 }
2206
2207 match ch {
2208 '\\' => escaped = true,
2209 '(' => depth += 1,
2210 ')' => {
2211 depth = depth.saturating_sub(1);
2212 if depth == 0 {
2213 break;
2214 }
2215 }
2216 _ => {}
2217 }
2218 }
2219
2220 let end = self.current_position();
2221 let span = Some(Span::from_positions(start, end));
2222 if let Some(text) = text {
2223 Some(LexedWordSegment::owned_with_spans(
2224 LexedWordSegmentKind::Plain,
2225 text,
2226 span,
2227 span,
2228 ))
2229 } else {
2230 Some(LexedWordSegment::borrowed_with_spans(
2231 LexedWordSegmentKind::Plain,
2232 &self.input[start.offset..end.offset],
2233 span,
2234 span,
2235 ))
2236 }
2237 }
2238
2239 fn read_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2240 self.read_double_quoted_word(false)
2241 }
2242
2243 fn read_dollar_double_quoted_string(&mut self) -> Option<LexedToken<'a>> {
2244 self.read_double_quoted_word(true)
2245 }
2246
2247 fn read_double_quoted_word(&mut self, dollar: bool) -> Option<LexedToken<'a>> {
2248 let segment = match self.read_double_quoted_segment_with_dollar(dollar) {
2249 Ok(segment) => segment,
2250 Err(kind) => return Some(LexedToken::error(kind)),
2251 };
2252 let mut word = LexedWord::from_segment(segment);
2253 if let Err(kind) = self.append_segmented_continuation(&mut word) {
2254 return Some(LexedToken::error(kind));
2255 }
2256
2257 let kind = if word.single_segment().is_some() {
2258 TokenKind::QuotedWord
2259 } else {
2260 TokenKind::Word
2261 };
2262
2263 Some(LexedToken::with_word_payload(kind, word))
2264 }
2265
2266 fn read_double_quoted_segment(&mut self) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2267 self.read_double_quoted_segment_with_dollar(false)
2268 }
2269
2270 fn read_dollar_double_quoted_segment(
2271 &mut self,
2272 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2273 self.read_double_quoted_segment_with_dollar(true)
2274 }
2275
2276 fn read_double_quoted_segment_with_dollar(
2277 &mut self,
2278 dollar: bool,
2279 ) -> Result<LexedWordSegment<'a>, LexerErrorKind> {
2280 if dollar {
2281 debug_assert_eq!(self.peek_char(), Some('$'));
2282 debug_assert_eq!(self.second_char(), Some('"'));
2283 } else {
2284 debug_assert_eq!(self.peek_char(), Some('"'));
2285 }
2286
2287 let wrapper_start = self.current_position();
2288 if dollar {
2289 self.consume_ascii_chars(2); } else {
2291 self.consume_ascii_chars(1); }
2293 let content_start = self.current_position();
2294 let mut content_end = content_start;
2295 let mut simple = self.reinject_buf.is_empty();
2296 let mut borrowable = self.reinject_buf.is_empty();
2297 let mut content = (!self.reinject_buf.is_empty()).then(|| String::with_capacity(16));
2298 let mut closed = false;
2299
2300 while let Some(ch) = self.peek_char() {
2301 if simple {
2302 if self.reinject_buf.is_empty() {
2303 let rest = self.cursor.rest();
2304 match Self::find_double_quote_special(rest) {
2305 Some(index) if index > 0 => {
2306 self.consume_source_bytes(index);
2307 continue;
2308 }
2309 None => {
2310 self.consume_source_bytes(rest.len());
2311 return Err(LexerErrorKind::DoubleQuote);
2312 }
2313 _ => {}
2314 }
2315 }
2316
2317 match ch {
2318 '"' => {
2319 content_end = self.current_position();
2320 self.consume_ascii_chars(1); closed = true;
2322 break;
2323 }
2324 '\\' | '$' | '`' => {
2325 simple = false;
2326 if ch == '`' {
2327 borrowable = false;
2328 let capture_end = self.current_position();
2329 self.ensure_capture_from_source(
2330 &mut content,
2331 content_start,
2332 capture_end,
2333 );
2334 }
2335 }
2336 _ => {
2337 self.advance();
2338 }
2339 }
2340 if simple {
2341 continue;
2342 }
2343 }
2344
2345 match ch {
2346 '"' => {
2347 if borrowable {
2348 content_end = self.current_position();
2349 }
2350 self.consume_ascii_chars(1); closed = true;
2352 break;
2353 }
2354 '\\' => {
2355 let escape_start = self.current_position();
2356 self.advance();
2357 if let Some(next) = self.peek_char() {
2358 match next {
2359 '\n' => {
2360 borrowable = false;
2361 self.ensure_capture_from_source(
2362 &mut content,
2363 content_start,
2364 escape_start,
2365 );
2366 self.advance();
2367 }
2368 '$' => {
2369 borrowable = false;
2370 self.ensure_capture_from_source(
2371 &mut content,
2372 content_start,
2373 escape_start,
2374 );
2375 Self::push_capture_char(&mut content, '\x00');
2376 Self::push_capture_char(&mut content, '$');
2377 self.advance();
2378 }
2379 '"' | '\\' | '`' => {
2380 borrowable = false;
2381 self.ensure_capture_from_source(
2382 &mut content,
2383 content_start,
2384 escape_start,
2385 );
2386 if next == '\\' {
2387 Self::push_capture_char(&mut content, '\x00');
2388 }
2389 if next == '`' {
2390 Self::push_capture_char(&mut content, '\x00');
2391 }
2392 Self::push_capture_char(&mut content, next);
2393 self.advance();
2394 content_end = self.current_position();
2395 }
2396 _ => {
2397 Self::push_capture_char(&mut content, '\\');
2398 Self::push_capture_char(&mut content, next);
2399 self.advance();
2400 content_end = self.current_position();
2401 }
2402 }
2403 }
2404 }
2405 '$' => {
2406 Self::push_capture_char(&mut content, '$');
2407 self.advance();
2408 if self.peek_char() == Some('(') {
2409 if self.second_char() == Some('(') {
2410 self.read_arithmetic_expansion_into(&mut content);
2411 } else {
2412 Self::push_capture_char(&mut content, '(');
2413 self.advance();
2414 self.read_command_subst_into(&mut content);
2415 }
2416 } else if self.peek_char() == Some('{') {
2417 Self::push_capture_char(&mut content, '{');
2418 self.advance();
2419 borrowable &= self.read_param_expansion_into(&mut content, content_start);
2420 }
2421 content_end = self.current_position();
2422 }
2423 '`' => {
2424 borrowable = false;
2425 let capture_end = self.current_position();
2426 self.ensure_capture_from_source(&mut content, content_start, capture_end);
2427 Self::push_capture_char(&mut content, '`');
2428 self.advance(); while let Some(c) = self.peek_char() {
2430 Self::push_capture_char(&mut content, c);
2431 self.advance();
2432 if c == '`' {
2433 break;
2434 }
2435 if c == '\\'
2436 && let Some(next) = self.peek_char()
2437 {
2438 Self::push_capture_char(&mut content, next);
2439 self.advance();
2440 }
2441 }
2442 content_end = self.current_position();
2443 }
2444 _ => {
2445 Self::push_capture_char(&mut content, ch);
2446 self.advance();
2447 content_end = self.current_position();
2448 }
2449 }
2450 }
2451
2452 if !closed {
2453 return Err(LexerErrorKind::DoubleQuote);
2454 }
2455
2456 let wrapper_span = Some(Span::from_positions(wrapper_start, self.current_position()));
2457 let content_span = Some(Span::from_positions(content_start, content_end));
2458
2459 if borrowable {
2460 Ok(LexedWordSegment::borrowed_with_spans(
2461 if dollar {
2462 LexedWordSegmentKind::DollarDoubleQuoted
2463 } else {
2464 LexedWordSegmentKind::DoubleQuoted
2465 },
2466 &self.input[content_start.offset..content_end.offset],
2467 content_span,
2468 wrapper_span,
2469 ))
2470 } else {
2471 Ok(LexedWordSegment::owned_with_spans(
2472 if dollar {
2473 LexedWordSegmentKind::DollarDoubleQuoted
2474 } else {
2475 LexedWordSegmentKind::DoubleQuoted
2476 },
2477 content.unwrap_or_default(),
2478 content_span,
2479 wrapper_span,
2480 ))
2481 }
2482 }
2483
2484 fn read_arithmetic_expansion_into(&mut self, content: &mut Option<String>) -> bool {
2485 debug_assert_eq!(self.peek_char(), Some('('));
2486 debug_assert_eq!(self.second_char(), Some('('));
2487
2488 Self::push_capture_char(content, '(');
2489 self.advance();
2490 Self::push_capture_char(content, '(');
2491 self.advance();
2492
2493 let mut depth = 2;
2494 while let Some(c) = self.peek_char() {
2495 match c {
2496 '\\' => {
2497 Self::push_capture_char(content, c);
2498 self.advance();
2499 if let Some(next) = self.peek_char() {
2500 Self::push_capture_char(content, next);
2501 self.advance();
2502 }
2503 }
2504 '\'' => {
2505 Self::push_capture_char(content, c);
2506 self.advance();
2507 while let Some(quoted) = self.peek_char() {
2508 Self::push_capture_char(content, quoted);
2509 self.advance();
2510 if quoted == '\'' {
2511 break;
2512 }
2513 }
2514 }
2515 '"' => {
2516 let mut escaped = false;
2517 Self::push_capture_char(content, c);
2518 self.advance();
2519 while let Some(quoted) = self.peek_char() {
2520 Self::push_capture_char(content, quoted);
2521 self.advance();
2522 if escaped {
2523 escaped = false;
2524 continue;
2525 }
2526 match quoted {
2527 '\\' => escaped = true,
2528 '"' => break,
2529 _ => {}
2530 }
2531 }
2532 }
2533 '`' => {
2534 let mut escaped = false;
2535 Self::push_capture_char(content, c);
2536 self.advance();
2537 while let Some(quoted) = self.peek_char() {
2538 Self::push_capture_char(content, quoted);
2539 self.advance();
2540 if escaped {
2541 escaped = false;
2542 continue;
2543 }
2544 match quoted {
2545 '\\' => escaped = true,
2546 '`' => break,
2547 _ => {}
2548 }
2549 }
2550 }
2551 '(' => {
2552 Self::push_capture_char(content, c);
2553 self.advance();
2554 depth += 1;
2555 }
2556 ')' => {
2557 Self::push_capture_char(content, c);
2558 self.advance();
2559 depth -= 1;
2560 if depth == 0 {
2561 return true;
2562 }
2563 }
2564 _ => {
2565 Self::push_capture_char(content, c);
2566 self.advance();
2567 }
2568 }
2569 }
2570
2571 false
2572 }
2573
2574 fn read_legacy_arithmetic_into(
2575 &mut self,
2576 content: &mut Option<String>,
2577 segment_start: Position,
2578 ) -> bool {
2579 let mut bracket_depth = 1;
2580
2581 while let Some(c) = self.peek_char() {
2582 match c {
2583 '\\' => {
2584 Self::push_capture_char(content, c);
2585 self.advance();
2586 if let Some(next) = self.peek_char() {
2587 Self::push_capture_char(content, next);
2588 self.advance();
2589 }
2590 }
2591 '\'' => {
2592 Self::push_capture_char(content, c);
2593 self.advance();
2594 while let Some(quoted) = self.peek_char() {
2595 Self::push_capture_char(content, quoted);
2596 self.advance();
2597 if quoted == '\'' {
2598 break;
2599 }
2600 }
2601 }
2602 '"' => {
2603 let mut escaped = false;
2604 Self::push_capture_char(content, c);
2605 self.advance();
2606 while let Some(quoted) = self.peek_char() {
2607 Self::push_capture_char(content, quoted);
2608 self.advance();
2609 if escaped {
2610 escaped = false;
2611 continue;
2612 }
2613 match quoted {
2614 '\\' => escaped = true,
2615 '"' => break,
2616 _ => {}
2617 }
2618 }
2619 }
2620 '`' => {
2621 let mut escaped = false;
2622 Self::push_capture_char(content, c);
2623 self.advance();
2624 while let Some(quoted) = self.peek_char() {
2625 Self::push_capture_char(content, quoted);
2626 self.advance();
2627 if escaped {
2628 escaped = false;
2629 continue;
2630 }
2631 match quoted {
2632 '\\' => escaped = true,
2633 '`' => break,
2634 _ => {}
2635 }
2636 }
2637 }
2638 '[' => {
2639 Self::push_capture_char(content, c);
2640 self.advance();
2641 bracket_depth += 1;
2642 }
2643 ']' => {
2644 Self::push_capture_char(content, c);
2645 self.advance();
2646 bracket_depth -= 1;
2647 if bracket_depth == 0 {
2648 return true;
2649 }
2650 }
2651 '$' => {
2652 Self::push_capture_char(content, c);
2653 self.advance();
2654 if self.peek_char() == Some('(') {
2655 if self.second_char() == Some('(') {
2656 if !self.read_arithmetic_expansion_into(content) {
2657 return false;
2658 }
2659 } else {
2660 Self::push_capture_char(content, '(');
2661 self.advance();
2662 if !self.read_command_subst_into(content) {
2663 return false;
2664 }
2665 }
2666 } else if self.peek_char() == Some('{') {
2667 Self::push_capture_char(content, '{');
2668 self.advance();
2669 if !self.read_param_expansion_into(content, segment_start) {
2670 return false;
2671 }
2672 } else if self.peek_char() == Some('[') {
2673 Self::push_capture_char(content, '[');
2674 self.advance();
2675 if !self.read_legacy_arithmetic_into(content, segment_start) {
2676 return false;
2677 }
2678 }
2679 }
2680 _ => {
2681 Self::push_capture_char(content, c);
2682 self.advance();
2683 }
2684 }
2685 }
2686
2687 false
2688 }
2689
2690 fn read_command_subst_into(&mut self, content: &mut Option<String>) -> bool {
2694 self.read_command_subst_into_depth(content, 0)
2695 }
2696
2697 fn flush_command_subst_keyword(
2698 current_word: &mut String,
2699 pending_case_headers: &mut usize,
2700 case_clause_depths: &mut SmallVec<[usize; 4]>,
2701 depth: usize,
2702 word_started_at_command_start: &mut bool,
2703 ) {
2704 if current_word.is_empty() {
2705 *word_started_at_command_start = false;
2706 return;
2707 }
2708
2709 match current_word.as_str() {
2710 "case" if *word_started_at_command_start => *pending_case_headers += 1,
2711 "in" if *pending_case_headers > 0 => {
2712 *pending_case_headers -= 1;
2713 case_clause_depths.push(depth);
2714 }
2715 "esac" if *word_started_at_command_start => {
2716 case_clause_depths.pop();
2717 }
2718 _ => {}
2719 }
2720
2721 current_word.clear();
2722 *word_started_at_command_start = false;
2723 }
2724
2725 fn read_command_subst_heredoc_delimiter_into(
2726 &mut self,
2727 content: &mut Option<String>,
2728 ) -> Option<String> {
2729 while let Some(ch) = self.peek_char() {
2730 if !matches!(ch, ' ' | '\t') {
2731 break;
2732 }
2733 Self::push_capture_char(content, ch);
2734 self.advance();
2735 }
2736
2737 let mut cooked = String::new();
2738 let mut in_single = false;
2739 let mut in_double = false;
2740 let mut escaped = false;
2741 let mut saw_any = false;
2742
2743 while let Some(ch) = self.peek_char() {
2744 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
2745 break;
2746 }
2747
2748 saw_any = true;
2749 Self::push_capture_char(content, ch);
2750 self.advance();
2751
2752 if escaped {
2753 cooked.push(ch);
2754 escaped = false;
2755 continue;
2756 }
2757
2758 match ch {
2759 '\\' if !in_single => escaped = true,
2760 '\'' if !in_double => in_single = !in_single,
2761 '"' if !in_single => in_double = !in_double,
2762 _ => cooked.push(ch),
2763 }
2764 }
2765
2766 saw_any.then_some(cooked)
2767 }
2768
2769 fn read_command_subst_backtick_segment_into(&mut self, content: &mut Option<String>) {
2770 Self::push_capture_char(content, '`');
2771 self.advance();
2772 while let Some(ch) = self.peek_char() {
2773 Self::push_capture_char(content, ch);
2774 self.advance();
2775 if ch == '\\' {
2776 if let Some(esc) = self.peek_char() {
2777 Self::push_capture_char(content, esc);
2778 self.advance();
2779 }
2780 continue;
2781 }
2782 if ch == '`' {
2783 break;
2784 }
2785 }
2786 }
2787
2788 fn read_command_subst_pending_heredoc_into(
2789 &mut self,
2790 content: &mut Option<String>,
2791 delimiter: &str,
2792 strip_tabs: bool,
2793 ) -> bool {
2794 loop {
2795 let mut line = String::new();
2796 let mut saw_newline = false;
2797
2798 while let Some(ch) = self.peek_char() {
2799 self.advance();
2800 if ch == '\n' {
2801 saw_newline = true;
2802 break;
2803 }
2804 line.push(ch);
2805 }
2806
2807 Self::push_capture_str(content, &line);
2808 if saw_newline {
2809 Self::push_capture_char(content, '\n');
2810 }
2811
2812 if heredoc_line_matches_delimiter(&line, delimiter, strip_tabs) || !saw_newline {
2813 return true;
2814 }
2815 }
2816 }
2817
2818 fn read_command_subst_into_depth(
2819 &mut self,
2820 content: &mut Option<String>,
2821 subst_depth: usize,
2822 ) -> bool {
2823 if subst_depth >= self.max_subst_depth {
2824 let mut depth = 1;
2826 while let Some(c) = self.peek_char() {
2827 self.advance();
2828 match c {
2829 '(' => depth += 1,
2830 ')' => {
2831 depth -= 1;
2832 if depth == 0 {
2833 Self::push_capture_char(content, ')');
2834 return true;
2835 }
2836 }
2837 _ => {}
2838 }
2839 }
2840 return false;
2841 }
2842
2843 let mut depth = 1;
2844 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
2845 let mut pending_case_headers = 0usize;
2846 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
2847 let mut current_word = String::with_capacity(16);
2848 let mut at_command_start = true;
2849 let mut expecting_redirection_target = false;
2850 let mut current_word_started_at_command_start = false;
2851 while let Some(c) = self.peek_char() {
2852 match c {
2853 '#' if !self.should_treat_hash_as_word_char() => {
2854 let had_word = !current_word.is_empty();
2855 Self::flush_command_subst_keyword(
2856 &mut current_word,
2857 &mut pending_case_headers,
2858 &mut case_clause_depths,
2859 depth,
2860 &mut current_word_started_at_command_start,
2861 );
2862 if had_word && expecting_redirection_target {
2863 expecting_redirection_target = false;
2864 }
2865 Self::push_capture_char(content, '#');
2866 self.advance();
2867 while let Some(comment_ch) = self.peek_char() {
2868 Self::push_capture_char(content, comment_ch);
2869 self.advance();
2870 if comment_ch == '\n' {
2871 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
2872 if !self.read_command_subst_pending_heredoc_into(
2873 content, &delimiter, strip_tabs,
2874 ) {
2875 return false;
2876 }
2877 }
2878 at_command_start = true;
2879 expecting_redirection_target = false;
2880 break;
2881 }
2882 }
2883 }
2884 '(' => {
2885 Self::flush_command_subst_keyword(
2886 &mut current_word,
2887 &mut pending_case_headers,
2888 &mut case_clause_depths,
2889 depth,
2890 &mut current_word_started_at_command_start,
2891 );
2892 depth += 1;
2893 Self::push_capture_char(content, c);
2894 self.advance();
2895 at_command_start = true;
2896 expecting_redirection_target = false;
2897 }
2898 ')' => {
2899 Self::flush_command_subst_keyword(
2900 &mut current_word,
2901 &mut pending_case_headers,
2902 &mut case_clause_depths,
2903 depth,
2904 &mut current_word_started_at_command_start,
2905 );
2906 if case_clause_depths
2907 .last()
2908 .is_some_and(|case_depth| *case_depth == depth)
2909 {
2910 Self::push_capture_char(content, ')');
2911 self.advance();
2912 at_command_start = true;
2913 expecting_redirection_target = false;
2914 continue;
2915 }
2916 depth -= 1;
2917 self.advance();
2918 if depth == 0 {
2919 Self::push_capture_char(content, ')');
2920 return true;
2921 }
2922 Self::push_capture_char(content, c);
2923 at_command_start = false;
2924 expecting_redirection_target = false;
2925 }
2926 '"' => {
2927 let had_word = !current_word.is_empty();
2928 Self::flush_command_subst_keyword(
2929 &mut current_word,
2930 &mut pending_case_headers,
2931 &mut case_clause_depths,
2932 depth,
2933 &mut current_word_started_at_command_start,
2934 );
2935 if had_word && expecting_redirection_target {
2936 expecting_redirection_target = false;
2937 }
2938 Self::push_capture_char(content, '"');
2940 self.advance();
2941 while let Some(qc) = self.peek_char() {
2942 match qc {
2943 '"' => {
2944 Self::push_capture_char(content, '"');
2945 self.advance();
2946 break;
2947 }
2948 '\\' => {
2949 Self::push_capture_char(content, '\\');
2950 self.advance();
2951 if let Some(esc) = self.peek_char() {
2952 Self::push_capture_char(content, esc);
2953 self.advance();
2954 }
2955 }
2956 '$' => {
2957 Self::push_capture_char(content, '$');
2958 self.advance();
2959 if self.peek_char() == Some('(') {
2960 if self.second_char() == Some('(') {
2961 if !self.read_arithmetic_expansion_into(content) {
2962 return false;
2963 }
2964 } else {
2965 Self::push_capture_char(content, '(');
2966 self.advance();
2967 if !self
2968 .read_command_subst_into_depth(content, subst_depth + 1)
2969 {
2970 return false;
2971 }
2972 }
2973 }
2974 }
2975 _ => {
2976 Self::push_capture_char(content, qc);
2977 self.advance();
2978 }
2979 }
2980 }
2981 if expecting_redirection_target {
2982 expecting_redirection_target = false;
2983 } else {
2984 at_command_start = false;
2985 }
2986 }
2987 '\'' => {
2988 let had_word = !current_word.is_empty();
2989 Self::flush_command_subst_keyword(
2990 &mut current_word,
2991 &mut pending_case_headers,
2992 &mut case_clause_depths,
2993 depth,
2994 &mut current_word_started_at_command_start,
2995 );
2996 if had_word && expecting_redirection_target {
2997 expecting_redirection_target = false;
2998 }
2999 Self::push_capture_char(content, '\'');
3001 self.advance();
3002 while let Some(qc) = self.peek_char() {
3003 Self::push_capture_char(content, qc);
3004 self.advance();
3005 if qc == '\'' {
3006 break;
3007 }
3008 }
3009 if expecting_redirection_target {
3010 expecting_redirection_target = false;
3011 } else {
3012 at_command_start = false;
3013 }
3014 }
3015 '`' => {
3016 let had_word = !current_word.is_empty();
3017 Self::flush_command_subst_keyword(
3018 &mut current_word,
3019 &mut pending_case_headers,
3020 &mut case_clause_depths,
3021 depth,
3022 &mut current_word_started_at_command_start,
3023 );
3024 if had_word && expecting_redirection_target {
3025 expecting_redirection_target = false;
3026 }
3027 self.read_command_subst_backtick_segment_into(content);
3028 if expecting_redirection_target {
3029 expecting_redirection_target = false;
3030 } else {
3031 at_command_start = false;
3032 }
3033 }
3034 '$' if self.second_char() == Some('\'') => {
3035 let had_word = !current_word.is_empty();
3036 Self::flush_command_subst_keyword(
3037 &mut current_word,
3038 &mut pending_case_headers,
3039 &mut case_clause_depths,
3040 depth,
3041 &mut current_word_started_at_command_start,
3042 );
3043 if had_word && expecting_redirection_target {
3044 expecting_redirection_target = false;
3045 }
3046 Self::push_capture_char(content, '$');
3047 self.advance();
3048 Self::push_capture_char(content, '\'');
3049 self.advance();
3050 while let Some(qc) = self.peek_char() {
3051 Self::push_capture_char(content, qc);
3052 self.advance();
3053 if qc == '\\' {
3054 if let Some(esc) = self.peek_char() {
3055 Self::push_capture_char(content, esc);
3056 self.advance();
3057 }
3058 continue;
3059 }
3060 if qc == '\'' {
3061 break;
3062 }
3063 }
3064 if expecting_redirection_target {
3065 expecting_redirection_target = false;
3066 } else {
3067 at_command_start = false;
3068 }
3069 }
3070 '\\' => {
3071 let had_word = !current_word.is_empty();
3072 Self::flush_command_subst_keyword(
3073 &mut current_word,
3074 &mut pending_case_headers,
3075 &mut case_clause_depths,
3076 depth,
3077 &mut current_word_started_at_command_start,
3078 );
3079 if had_word && expecting_redirection_target {
3080 expecting_redirection_target = false;
3081 }
3082 Self::push_capture_char(content, '\\');
3083 self.advance();
3084 if let Some(esc) = self.peek_char() {
3085 Self::push_capture_char(content, esc);
3086 self.advance();
3087 }
3088 if expecting_redirection_target {
3089 expecting_redirection_target = false;
3090 } else {
3091 at_command_start = false;
3092 }
3093 }
3094 '<' if self.second_char() == Some('<') => {
3095 let word_was_redirection_fd = current_word_started_at_command_start
3096 && !current_word.is_empty()
3097 && current_word.chars().all(|current| current.is_ascii_digit());
3098 Self::flush_command_subst_keyword(
3099 &mut current_word,
3100 &mut pending_case_headers,
3101 &mut case_clause_depths,
3102 depth,
3103 &mut current_word_started_at_command_start,
3104 );
3105 if word_was_redirection_fd {
3106 at_command_start = true;
3107 }
3108
3109 Self::push_capture_char(content, '<');
3110 self.advance();
3111 Self::push_capture_char(content, '<');
3112 self.advance();
3113
3114 if self.peek_char() == Some('<') {
3115 Self::push_capture_char(content, '<');
3116 self.advance();
3117 expecting_redirection_target = true;
3118 continue;
3119 }
3120
3121 let strip_tabs = if self.peek_char() == Some('-') {
3122 Self::push_capture_char(content, '-');
3123 self.advance();
3124 true
3125 } else {
3126 false
3127 };
3128
3129 if let Some(delimiter) = self.read_command_subst_heredoc_delimiter_into(content)
3130 {
3131 pending_heredocs.push((delimiter, strip_tabs));
3132 expecting_redirection_target = false;
3133 } else {
3134 expecting_redirection_target = true;
3135 }
3136 }
3137 '>' | '<' => {
3138 let word_was_redirection_fd = current_word_started_at_command_start
3139 && !current_word.is_empty()
3140 && current_word.chars().all(|current| current.is_ascii_digit());
3141 Self::flush_command_subst_keyword(
3142 &mut current_word,
3143 &mut pending_case_headers,
3144 &mut case_clause_depths,
3145 depth,
3146 &mut current_word_started_at_command_start,
3147 );
3148 if word_was_redirection_fd {
3149 at_command_start = true;
3150 }
3151 Self::push_capture_char(content, c);
3152 self.advance();
3153 expecting_redirection_target = true;
3154 }
3155 '\n' => {
3156 Self::flush_command_subst_keyword(
3157 &mut current_word,
3158 &mut pending_case_headers,
3159 &mut case_clause_depths,
3160 depth,
3161 &mut current_word_started_at_command_start,
3162 );
3163 Self::push_capture_char(content, '\n');
3164 self.advance();
3165 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
3166 if !self.read_command_subst_pending_heredoc_into(
3167 content, &delimiter, strip_tabs,
3168 ) {
3169 return false;
3170 }
3171 }
3172 at_command_start = true;
3173 expecting_redirection_target = false;
3174 }
3175 _ => {
3176 if c.is_ascii_alphanumeric() || c == '_' {
3177 if current_word.is_empty()
3178 && !expecting_redirection_target
3179 && at_command_start
3180 {
3181 current_word_started_at_command_start = true;
3182 at_command_start = false;
3183 }
3184 current_word.push(c);
3185 } else {
3186 let had_word = !current_word.is_empty();
3187 Self::flush_command_subst_keyword(
3188 &mut current_word,
3189 &mut pending_case_headers,
3190 &mut case_clause_depths,
3191 depth,
3192 &mut current_word_started_at_command_start,
3193 );
3194 if had_word && expecting_redirection_target {
3195 expecting_redirection_target = false;
3196 }
3197 match c {
3198 ' ' | '\t' => {}
3199 ';' | '|' | '&' => {
3200 at_command_start = true;
3201 expecting_redirection_target = false;
3202 }
3203 _ => {
3204 if !expecting_redirection_target {
3205 at_command_start = false;
3206 }
3207 }
3208 }
3209 }
3210 Self::push_capture_char(content, c);
3211 self.advance();
3212 }
3213 }
3214 }
3215
3216 false
3217 }
3218
3219 fn read_param_expansion_into(
3223 &mut self,
3224 content: &mut Option<String>,
3225 segment_start: Position,
3226 ) -> bool {
3227 let mut borrowable = true;
3228 let mut depth = 1;
3229 let mut literal_brace_depth = 0usize;
3230 let mut in_single = false;
3231 let mut in_double = false;
3232 let mut double_quote_depth = 0usize;
3233 while let Some(c) = self.peek_char() {
3234 if in_single {
3235 match c {
3236 '\\' => {
3237 let escape_start = self.current_position();
3238 if self.second_char() == Some('"') {
3239 self.advance();
3240 borrowable = false;
3241 self.ensure_capture_from_source(content, segment_start, escape_start);
3242 Self::push_capture_char(content, '"');
3243 self.advance();
3244 } else {
3245 Self::push_capture_char(content, '\\');
3246 self.advance();
3247 }
3248 }
3249 '\'' => {
3250 Self::push_capture_char(content, c);
3251 self.advance();
3252 in_single = false;
3253 }
3254 _ => {
3255 Self::push_capture_char(content, c);
3256 self.advance();
3257 }
3258 }
3259 continue;
3260 }
3261
3262 match c {
3263 '}' if !in_single && (!in_double || depth > double_quote_depth) => {
3264 self.advance();
3265 Self::push_capture_char(content, '}');
3266 if depth == 1
3267 && literal_brace_depth > 0
3268 && self.has_later_top_level_param_expansion_closer(depth)
3269 {
3270 literal_brace_depth -= 1;
3271 continue;
3272 }
3273 depth -= 1;
3274 if depth == 0 {
3275 break;
3276 }
3277 }
3278 '{' if !in_single && !in_double => {
3279 literal_brace_depth += 1;
3280 Self::push_capture_char(content, '{');
3281 self.advance();
3282 }
3283 '"' => {
3284 Self::push_capture_char(content, '"');
3286 self.advance();
3287 in_double = !in_double;
3288 double_quote_depth = if in_double { depth } else { 0 };
3289 }
3290 '\'' => {
3291 Self::push_capture_char(content, '\'');
3292 self.advance();
3293 if !in_double {
3294 in_single = true;
3295 }
3296 }
3297 '\\' => {
3298 let escape_start = self.current_position();
3301 self.advance();
3302 if let Some(esc) = self.peek_char() {
3303 match esc {
3304 '$' => {
3305 borrowable = false;
3306 self.ensure_capture_from_source(
3307 content,
3308 segment_start,
3309 escape_start,
3310 );
3311 Self::push_capture_char(content, '\x00');
3312 Self::push_capture_char(content, '$');
3313 self.advance();
3314 }
3315 '"' | '\\' | '`' => {
3316 borrowable = false;
3317 self.ensure_capture_from_source(
3318 content,
3319 segment_start,
3320 escape_start,
3321 );
3322 Self::push_capture_char(content, esc);
3323 self.advance();
3324 }
3325 '}' => {
3326 Self::push_capture_char(content, '\\');
3328 Self::push_capture_char(content, '}');
3329 self.advance();
3330 literal_brace_depth = literal_brace_depth.saturating_sub(1);
3331 }
3332 _ => {
3333 Self::push_capture_char(content, '\\');
3334 Self::push_capture_char(content, esc);
3335 self.advance();
3336 }
3337 }
3338 } else {
3339 Self::push_capture_char(content, '\\');
3340 }
3341 }
3342 '$' => {
3343 Self::push_capture_char(content, '$');
3344 self.advance();
3345 if self.peek_char() == Some('(') {
3346 if self.second_char() == Some('(') {
3347 if !self.read_arithmetic_expansion_into(content) {
3348 borrowable = false;
3349 }
3350 } else {
3351 Self::push_capture_char(content, '(');
3352 self.advance();
3353 self.read_command_subst_into(content);
3354 }
3355 } else if self.peek_char() == Some('{') {
3356 Self::push_capture_char(content, '{');
3357 self.advance();
3358 borrowable &= self.read_param_expansion_into(content, segment_start);
3359 }
3360 }
3361 _ => {
3362 Self::push_capture_char(content, c);
3363 self.advance();
3364 }
3365 }
3366 }
3367 borrowable
3368 }
3369
3370 fn has_later_top_level_param_expansion_closer(&self, target_depth: usize) -> bool {
3371 let mut chars = self.lookahead_chars().peekable();
3372 let mut depth = target_depth;
3373 let mut in_single = false;
3374 let mut in_double = false;
3375 let mut double_quote_depth = 0usize;
3376
3377 while let Some(ch) = chars.next() {
3378 if in_single {
3379 match ch {
3380 '\'' => in_single = false,
3381 '\\' if chars.peek() == Some(&'"') => {
3382 chars.next();
3383 }
3384 '\\' => {}
3385 _ => {}
3386 }
3387 continue;
3388 }
3389
3390 if in_double {
3391 match ch {
3392 '"' => {
3393 in_double = false;
3394 double_quote_depth = 0;
3395 }
3396 '\\' => {
3397 chars.next();
3398 }
3399 '$' if chars.peek() == Some(&'{') => {
3400 chars.next();
3401 depth += 1;
3402 }
3403 '}' if depth > double_quote_depth => {
3404 depth -= 1;
3405 }
3406 _ => {}
3407 }
3408 continue;
3409 }
3410
3411 match ch {
3412 '\n' if depth == target_depth => return false,
3413 '\'' => in_single = true,
3414 '"' => {
3415 in_double = true;
3416 double_quote_depth = depth;
3417 }
3418 '\\' => {
3419 chars.next();
3420 }
3421 '$' if chars.peek() == Some(&'{') => {
3422 chars.next();
3423 depth += 1;
3424 }
3425 '}' => {
3426 if depth == target_depth {
3427 return true;
3428 }
3429 depth -= 1;
3430 }
3431 _ => {}
3432 }
3433 }
3434
3435 false
3436 }
3437
3438 fn looks_like_brace_expansion(&self) -> bool {
3444 const MAX_LOOKAHEAD: usize = 10_000;
3445
3446 let mut chars = self.lookahead_chars();
3447
3448 if chars.next() != Some('{') {
3450 return false;
3451 }
3452
3453 let mut depth = 1;
3454 let mut paren_depth = 0usize;
3455 let mut has_comma = false;
3456 let mut has_dot_dot = false;
3457 let mut escaped = false;
3458 let mut in_single = false;
3459 let mut in_double = false;
3460 let mut in_backtick = false;
3461 let mut prev_char = None;
3462 let mut scanned = 0usize;
3463
3464 for ch in chars {
3465 scanned += 1;
3466 if scanned > MAX_LOOKAHEAD {
3467 return false;
3468 }
3469
3470 let brace_surface_active = !in_single && !in_double && !in_backtick;
3471 let at_top_level = depth == 1 && paren_depth == 0 && brace_surface_active;
3472
3473 match ch {
3474 _ if escaped => {
3475 escaped = false;
3476 }
3477 '\\' if !in_single => escaped = true,
3478 '\'' if !in_double && !in_backtick => in_single = !in_single,
3479 '"' if !in_single && !in_backtick => in_double = !in_double,
3480 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3481 '(' if brace_surface_active && (paren_depth > 0 || prev_char == Some('$')) => {
3482 paren_depth += 1
3483 }
3484 ')' if brace_surface_active && paren_depth > 0 => paren_depth -= 1,
3485 '{' if !in_single && !in_double && !in_backtick => depth += 1,
3486 '}' if !in_single && !in_double && !in_backtick => {
3487 depth -= 1;
3488 if depth == 0 {
3489 return has_comma || has_dot_dot;
3491 }
3492 }
3493 ',' if at_top_level => has_comma = true,
3494 '.' if at_top_level && prev_char == Some('.') => has_dot_dot = true,
3495 ' ' | '\t' | '\n' | ';' if at_top_level => return false,
3497 _ => {}
3498 }
3499 prev_char = Some(ch);
3500 }
3501
3502 false
3503 }
3504
3505 fn consume_mid_word_brace_segment(&mut self, word: &mut Option<String>) {
3506 let mut brace_depth = 1usize;
3507 let mut paren_depth = 0usize;
3508 let mut escaped = false;
3509 let mut in_single = false;
3510 let mut in_double = false;
3511 let mut in_backtick = false;
3512 let mut prev_char = None;
3513
3514 while let Some(ch) = self.peek_char() {
3515 Self::push_capture_char(word, ch);
3516 self.advance();
3517
3518 if escaped {
3519 escaped = false;
3520 prev_char = Some(ch);
3521 continue;
3522 }
3523
3524 match ch {
3525 '\\' if !in_single => escaped = true,
3526 '\'' if !in_double && !in_backtick => in_single = !in_single,
3527 '"' if !in_single && !in_backtick => in_double = !in_double,
3528 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3529 '(' if !in_single
3530 && !in_double
3531 && !in_backtick
3532 && (paren_depth > 0 || prev_char == Some('$')) =>
3533 {
3534 paren_depth += 1
3535 }
3536 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3537 paren_depth -= 1
3538 }
3539 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3540 '}' if !in_single && !in_double && !in_backtick => {
3541 brace_depth -= 1;
3542 if brace_depth == 0 {
3543 break;
3544 }
3545 }
3546 _ => {}
3547 }
3548
3549 prev_char = Some(ch);
3550 }
3551 }
3552
3553 fn consume_brace_word_body(&mut self, word: &mut String) {
3554 let mut brace_depth = 1usize;
3555 let mut paren_depth = 0usize;
3556 let mut escaped = false;
3557 let mut in_single = false;
3558 let mut in_double = false;
3559 let mut in_backtick = false;
3560 let mut prev_char = None;
3561
3562 while let Some(ch) = self.peek_char() {
3563 word.push(ch);
3564 self.advance();
3565
3566 if escaped {
3567 escaped = false;
3568 prev_char = Some(ch);
3569 continue;
3570 }
3571
3572 match ch {
3573 '\\' if !in_single => escaped = true,
3574 '\'' if !in_double && !in_backtick => in_single = !in_single,
3575 '"' if !in_single && !in_backtick => in_double = !in_double,
3576 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3577 '(' if !in_single
3578 && !in_double
3579 && !in_backtick
3580 && (paren_depth > 0 || prev_char == Some('$')) =>
3581 {
3582 paren_depth += 1
3583 }
3584 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3585 paren_depth -= 1
3586 }
3587 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3588 '}' if !in_single && !in_double && !in_backtick => {
3589 brace_depth -= 1;
3590 if brace_depth == 0 {
3591 break;
3592 }
3593 }
3594 _ => {}
3595 }
3596
3597 prev_char = Some(ch);
3598 }
3599 }
3600
3601 fn looks_like_mid_word_brace_segment(&self) -> bool {
3604 const MAX_LOOKAHEAD: usize = 10_000;
3605
3606 let mut chars = self.lookahead_chars();
3607 if chars.next() != Some('{') {
3608 return false;
3609 }
3610
3611 let mut brace_depth = 1;
3612 let mut paren_depth = 0usize;
3613 let mut escaped = false;
3614 let mut in_single = false;
3615 let mut in_double = false;
3616 let mut in_backtick = false;
3617 let mut prev_char = None;
3618 let mut scanned = 0usize;
3619
3620 for ch in chars {
3621 scanned += 1;
3622 if scanned > MAX_LOOKAHEAD {
3623 return false;
3624 }
3625
3626 if !in_single
3627 && !in_double
3628 && !in_backtick
3629 && !escaped
3630 && brace_depth == 1
3631 && paren_depth == 0
3632 && matches!(ch, ' ' | '\t' | '\n' | ';' | '|' | '&' | '<' | '>')
3633 {
3634 return false;
3635 }
3636
3637 if escaped {
3638 escaped = false;
3639 prev_char = Some(ch);
3640 continue;
3641 }
3642
3643 match ch {
3644 '\\' => escaped = true,
3645 '\'' if !in_double && !in_backtick => in_single = !in_single,
3646 '"' if !in_single && !in_backtick => in_double = !in_double,
3647 '`' if !in_single && !in_double => in_backtick = !in_backtick,
3648 '(' if !in_single
3649 && !in_double
3650 && !in_backtick
3651 && (paren_depth > 0 || prev_char == Some('$')) =>
3652 {
3653 paren_depth += 1
3654 }
3655 ')' if !in_single && !in_double && !in_backtick && paren_depth > 0 => {
3656 paren_depth -= 1
3657 }
3658 '{' if !in_single && !in_double && !in_backtick => brace_depth += 1,
3659 '}' if !in_single && !in_double && !in_backtick => {
3660 brace_depth -= 1;
3661 if brace_depth == 0 {
3662 return true;
3663 }
3664 }
3665 _ => {}
3666 }
3667
3668 prev_char = Some(ch);
3669 }
3670
3671 false
3672 }
3673
3674 fn is_brace_group_start(&self) -> bool {
3676 let mut chars = self.lookahead_chars();
3677 if chars.next() != Some('{') {
3679 return false;
3680 }
3681 matches!(chars.next(), Some(' ') | Some('\t') | Some('\n') | None)
3683 }
3684
3685 fn escaped_brace_sequence_looks_like_brace_expansion(&self) -> bool {
3688 const MAX_LOOKAHEAD: usize = 10_000;
3689
3690 let mut chars = self.lookahead_chars();
3691 let mut depth = 1;
3692 let mut has_comma = false;
3693 let mut has_dot_dot = false;
3694 let mut prev_char = None;
3695 let mut scanned = 0usize;
3696
3697 for ch in chars.by_ref() {
3698 scanned += 1;
3699 if scanned > MAX_LOOKAHEAD {
3700 return false;
3701 }
3702 match ch {
3703 '{' => depth += 1,
3704 '}' => {
3705 depth -= 1;
3706 if depth == 0 {
3707 return has_comma || has_dot_dot;
3708 }
3709 }
3710 ',' if depth == 1 => has_comma = true,
3711 '.' if prev_char == Some('.') && depth == 1 => has_dot_dot = true,
3712 ' ' | '\t' | '\n' | ';' if depth == 1 => return false,
3713 _ => {}
3714 }
3715 prev_char = Some(ch);
3716 }
3717
3718 false
3719 }
3720
3721 fn brace_literal_starts_case_pattern_delimiter(&self) -> bool {
3722 let mut chars = self.lookahead_chars();
3723 if chars.next() != Some('{') {
3724 return false;
3725 }
3726 chars.next() == Some(')')
3727 }
3728
3729 fn read_brace_literal_word(&mut self) -> Option<LexedToken<'a>> {
3731 let mut word = String::with_capacity(16);
3732
3733 if let Some('{') = self.peek_char() {
3734 word.push('{');
3735 self.advance();
3736 } else {
3737 return None;
3738 }
3739
3740 self.consume_brace_word_body(&mut word);
3741
3742 while let Some(ch) = self.peek_char() {
3743 if Self::is_word_char(ch) {
3744 if self.reinject_buf.is_empty() {
3745 let chunk = self.cursor.eat_while(Self::is_word_char);
3746 word.push_str(chunk);
3747 self.advance_scanned_source_bytes(chunk.len());
3748 } else {
3749 word.push(ch);
3750 self.advance();
3751 }
3752 } else {
3753 break;
3754 }
3755 }
3756
3757 Some(LexedToken::owned_word(TokenKind::Word, word))
3758 }
3759
3760 fn read_brace_expansion_word(&mut self) -> Option<LexedToken<'a>> {
3762 let mut word = String::with_capacity(16);
3763
3764 if let Some('{') = self.peek_char() {
3766 word.push('{');
3767 self.advance();
3768 } else {
3769 return None;
3770 }
3771
3772 self.consume_brace_word_body(&mut word);
3774
3775 while let Some(ch) = self.peek_char() {
3777 if Self::is_word_char(ch) || matches!(ch, '{' | '}') {
3778 if ch == '{' {
3779 word.push(ch);
3781 self.advance();
3782 self.consume_brace_word_body(&mut word);
3783 } else {
3784 word.push(ch);
3785 self.advance();
3786 }
3787 } else {
3788 break;
3789 }
3790 }
3791
3792 Some(LexedToken::owned_word(TokenKind::Word, word))
3793 }
3794
3795 fn looks_like_assoc_assign(&self) -> bool {
3799 let mut chars = self.lookahead_chars();
3800 if chars.next() != Some('(') {
3802 return false;
3803 }
3804 for ch in chars {
3806 match ch {
3807 ' ' | '\t' => continue,
3808 '[' => return true,
3809 _ => return false,
3810 }
3811 }
3812 false
3813 }
3814
3815 fn word_can_take_parenthesized_suffix(text: &str) -> bool {
3816 text.ends_with(['@', '?', '*', '+', '!']) || Self::looks_like_zsh_glob_qualifier_base(text)
3817 }
3818
3819 fn lexed_word_can_take_parenthesized_suffix(word: &LexedWord<'_>) -> bool {
3820 word.segments().any(|segment| {
3821 matches!(
3822 segment.kind(),
3823 LexedWordSegmentKind::SingleQuoted
3824 | LexedWordSegmentKind::DollarSingleQuoted
3825 | LexedWordSegmentKind::DoubleQuoted
3826 | LexedWordSegmentKind::DollarDoubleQuoted
3827 )
3828 }) || Self::word_can_take_parenthesized_suffix(&word.joined_text())
3829 }
3830
3831 fn looks_like_zsh_glob_qualifier_base(text: &str) -> bool {
3832 text.contains(['*', '?'])
3833 || text.ends_with('}') && text.contains("${")
3834 || text.ends_with(']')
3835 && text
3836 .rfind('[')
3837 .is_some_and(|open_bracket| !text[..open_bracket].ends_with('$'))
3838 }
3839
3840 fn is_word_char(ch: char) -> bool {
3841 !matches!(
3842 ch,
3843 ' ' | '\t' | '\n' | ';' | '|' | '&' | '>' | '<' | '(' | ')' | '{' | '}' | '\'' | '"'
3844 )
3845 }
3846
3847 const fn is_ascii_word_byte(byte: u8) -> bool {
3848 !matches!(
3849 byte,
3850 b' ' | b'\t'
3851 | b'\n'
3852 | b';'
3853 | b'|'
3854 | b'&'
3855 | b'>'
3856 | b'<'
3857 | b'('
3858 | b')'
3859 | b'{'
3860 | b'}'
3861 | b'\''
3862 | b'"'
3863 )
3864 }
3865
3866 const fn is_ascii_plain_word_byte(byte: u8) -> bool {
3867 Self::is_ascii_word_byte(byte) && !matches!(byte, b'$' | b'{' | b'`' | b'\\')
3868 }
3869
3870 fn is_plain_word_char(ch: char) -> bool {
3871 Self::is_word_char(ch) && !matches!(ch, '$' | '{' | '`' | '\\')
3872 }
3873
3874 pub(super) fn read_heredoc(&mut self, delimiter: &str, strip_tabs: bool) -> HeredocRead {
3876 let mut content = String::with_capacity(64);
3877 let mut current_line = String::with_capacity(64);
3878
3879 let mut rest_of_line = String::with_capacity(32);
3886 let rest_of_line_start = self.current_position();
3887 let mut in_double_quote = false;
3888 let mut in_single_quote = false;
3889 let mut in_comment = false;
3890 let mut saw_non_whitespace_tail = false;
3891 let mut consecutive_backslashes = 0usize;
3892 let mut previous_tail_char = None;
3893 while let Some(ch) = self.peek_char() {
3894 self.advance();
3895 if in_comment {
3896 if ch == '\n' {
3897 break;
3898 }
3899 rest_of_line.push(ch);
3900 previous_tail_char = Some(ch);
3901 continue;
3902 }
3903 if ch == '#'
3904 && !in_single_quote
3905 && !in_double_quote
3906 && self.comments_enabled()
3907 && heredoc_tail_hash_starts_comment(previous_tail_char)
3908 {
3909 in_comment = true;
3910 rest_of_line.push(ch);
3911 previous_tail_char = Some(ch);
3912 consecutive_backslashes = 0;
3913 continue;
3914 }
3915 let backslash_continues_line = ch == '\\'
3916 && !in_single_quote
3917 && self.peek_char() == Some('\n')
3918 && (saw_non_whitespace_tail || self.heredoc_tail_line_join_stays_in_tail())
3919 && consecutive_backslashes.is_multiple_of(2);
3920 if backslash_continues_line {
3921 rest_of_line.push(ch);
3922 rest_of_line.push('\n');
3923 self.advance();
3924 consecutive_backslashes = 0;
3925 continue;
3926 }
3927 if ch == '\n' && !in_double_quote && !in_single_quote {
3928 break;
3929 }
3930 if ch == '"' && !in_single_quote {
3931 in_double_quote = !in_double_quote;
3932 } else if ch == '\'' && !in_double_quote {
3933 in_single_quote = !in_single_quote;
3934 } else if ch == '\\' && in_double_quote {
3935 rest_of_line.push(ch);
3937 if let Some(next) = self.peek_char() {
3938 rest_of_line.push(next);
3939 self.advance();
3940 }
3941 continue;
3942 }
3943 rest_of_line.push(ch);
3944 if !ch.is_whitespace() {
3945 saw_non_whitespace_tail = true;
3946 }
3947 if ch == '\\' && !in_single_quote {
3948 consecutive_backslashes += 1;
3949 } else {
3950 consecutive_backslashes = 0;
3951 }
3952 previous_tail_char = Some(ch);
3953 }
3954
3955 self.sync_offset_to_cursor();
3959 let content_start = self.current_position();
3960 let mut current_line_start = content_start;
3961 let content_end;
3962
3963 loop {
3965 if self.reinject_buf.is_empty() {
3966 self.sync_offset_to_cursor();
3972 let rest = self.cursor.rest();
3973 if rest.is_empty() {
3974 content_end = self.current_position();
3975 break;
3976 }
3977
3978 let line_len = self.cursor.find_byte(b'\n').unwrap_or(rest.len());
3979 let line = &rest[..line_len];
3980 let has_newline = line_len < rest.len();
3981
3982 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) {
3983 content_end = current_line_start;
3984 self.consume_source_bytes(line_len);
3985 if has_newline {
3986 self.consume_ascii_chars(1);
3987 }
3988 break;
3989 }
3990
3991 content.push_str(line);
3992 self.consume_source_bytes(line_len);
3993
3994 if has_newline {
3995 self.consume_ascii_chars(1);
3996 content.push('\n');
3997 current_line_start = self.current_position();
3998 continue;
3999 }
4000
4001 content_end = self.current_position();
4002 break;
4003 }
4004
4005 match self.peek_char() {
4006 Some('\n') => {
4007 self.advance();
4008 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4010 content_end = current_line_start;
4011 break;
4012 }
4013 content.push_str(¤t_line);
4014 content.push('\n');
4015 current_line.clear();
4016 current_line_start = self.current_position();
4017 }
4018 Some(ch) => {
4019 current_line.push(ch);
4020 self.advance();
4021 }
4022 None => {
4023 if heredoc_line_matches_delimiter(¤t_line, delimiter, strip_tabs) {
4025 content_end = current_line_start;
4026 break;
4027 }
4028 if !current_line.is_empty() {
4029 content.push_str(¤t_line);
4030 }
4031 content_end = self.current_position();
4032 break;
4033 }
4034 }
4035 }
4036
4037 let post_heredoc_offset = self.offset;
4042 self.offset = rest_of_line_start.offset;
4043 for ch in rest_of_line.chars() {
4044 self.reinject_buf.push_back(ch);
4045 }
4046 self.reinject_buf.push_back('\n');
4047 self.reinject_resume_offset = Some(post_heredoc_offset);
4048
4049 HeredocRead {
4050 content,
4051 content_span: Span::from_positions(content_start, content_end),
4052 }
4053 }
4054
4055 fn heredoc_tail_line_join_stays_in_tail(&mut self) -> bool {
4056 let mut chars = self.cursor.rest().chars();
4057 if chars.next() != Some('\n') {
4058 return false;
4059 }
4060
4061 for ch in chars {
4062 if matches!(ch, ' ' | '\t') {
4063 continue;
4064 }
4065 if ch == '\n' {
4066 return false;
4067 }
4068 return matches!(ch, '|' | '&' | ';' | '<' | '>')
4069 || (ch == '#' && self.comments_enabled());
4070 }
4071
4072 false
4073 }
4074}
4075
4076fn heredoc_line_matches_delimiter(line: &str, delimiter: &str, strip_tabs: bool) -> bool {
4077 let line = if strip_tabs {
4078 line.trim_start_matches('\t')
4079 } else {
4080 line
4081 };
4082
4083 if line == delimiter {
4084 return true;
4085 }
4086
4087 let Some(trailing) = line.strip_prefix(delimiter) else {
4088 return false;
4089 };
4090
4091 trailing.chars().all(|ch| matches!(ch, ' ' | '\t'))
4092}
4093
4094fn heredoc_tail_hash_starts_comment(previous_tail_char: Option<char>) -> bool {
4095 previous_tail_char.is_none_or(|prev| {
4096 prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')')
4097 })
4098}
4099
4100fn next_char_boundary(input: &str, index: usize) -> Option<(char, usize)> {
4101 let ch = input.get(index..)?.chars().next()?;
4102 Some((ch, index + ch.len_utf8()))
4103}
4104
4105fn line_has_unclosed_double_paren(prefix: &str) -> bool {
4106 let mut index = 0usize;
4107 let mut depth = 0usize;
4108 let mut in_single = false;
4109 let mut in_double = false;
4110 let mut in_backtick = false;
4111 let mut escaped = false;
4112
4113 while let Some((ch, next_index)) = next_char_boundary(prefix, index) {
4114 let was_escaped = escaped;
4115 if ch == '\\' && !in_single {
4116 escaped = !escaped;
4117 index = next_index;
4118 continue;
4119 }
4120 escaped = false;
4121
4122 match ch {
4123 '\'' if !in_double && !in_backtick && !was_escaped => in_single = !in_single,
4124 '"' if !in_single && !in_backtick && !was_escaped => in_double = !in_double,
4125 '`' if !in_single && !in_double && !was_escaped => in_backtick = !in_backtick,
4126 '(' if !in_single
4127 && !in_double
4128 && !in_backtick
4129 && !was_escaped
4130 && prefix[next_index..].starts_with('(') =>
4131 {
4132 depth += 1;
4133 index = next_index + '('.len_utf8();
4134 continue;
4135 }
4136 ')' if !in_single
4137 && !in_double
4138 && !in_backtick
4139 && !was_escaped
4140 && prefix[next_index..].starts_with(')') =>
4141 {
4142 depth = depth.saturating_sub(1);
4143 index = next_index + ')'.len_utf8();
4144 continue;
4145 }
4146 _ => {}
4147 }
4148
4149 index = next_index;
4150 }
4151
4152 depth > 0
4153}
4154
4155fn inside_unclosed_double_paren_on_line(input: &str, index: usize) -> bool {
4156 let line_start = input[..index].rfind('\n').map_or(0, |found| found + 1);
4157 let prefix = &input[line_start..index];
4158 line_has_unclosed_double_paren(prefix)
4159}
4160
4161fn hash_starts_comment(input: &str, index: usize) -> bool {
4162 if inside_unclosed_double_paren_on_line(input, index) {
4163 return false;
4164 }
4165
4166 let next = &input[index + '#'.len_utf8()..];
4167 input[..index]
4168 .chars()
4169 .next_back()
4170 .is_none_or(|prev| match prev {
4171 '(' => {
4172 let whitespace_index = next.find(char::is_whitespace);
4173 let close_index = next.find(')');
4174
4175 match (whitespace_index, close_index) {
4176 (Some(whitespace), Some(close)) => whitespace < close,
4177 (Some(_), None) | (None, None) => true,
4178 (None, Some(_)) => false,
4179 }
4180 }
4181 _ => prev.is_whitespace() || matches!(prev, ';' | '|' | '&' | '<' | '>' | ')'),
4182 })
4183}
4184
4185fn heredoc_delimiter_is_terminator(
4186 ch: char,
4187 in_single: bool,
4188 in_double: bool,
4189 escaped: bool,
4190) -> bool {
4191 !in_single
4192 && !in_double
4193 && !escaped
4194 && (ch.is_whitespace() || matches!(ch, '|' | '&' | ';' | '<' | '>' | '(' | ')'))
4195}
4196
4197fn scan_double_quoted_command_substitution_segment(
4198 input: &str,
4199 mut index: usize,
4200 subst_depth: usize,
4201) -> Option<usize> {
4202 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4203 match ch {
4204 '"' => return Some(next_index),
4205 '\\' => {
4206 index = next_index;
4207 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4208 index = escaped_next;
4209 }
4210 }
4211 '$' if input[next_index..].starts_with('{') => {
4212 let consumed = scan_command_subst_parameter_expansion_len(
4213 &input[next_index + '{'.len_utf8()..],
4214 subst_depth,
4215 0,
4216 )?;
4217 index = next_index + '{'.len_utf8() + consumed;
4218 }
4219 '$' if input[next_index..].starts_with('(')
4220 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4221 {
4222 let consumed = scan_command_substitution_body_len_inner(
4223 &input[next_index + '('.len_utf8()..],
4224 subst_depth + 1,
4225 )?;
4226 index = next_index + '('.len_utf8() + consumed;
4227 }
4228 _ => index = next_index,
4229 }
4230 }
4231
4232 None
4233}
4234
4235fn scan_command_subst_parameter_expansion_len(
4236 input: &str,
4237 subst_depth: usize,
4238 parameter_depth: usize,
4239) -> Option<usize> {
4240 if parameter_depth >= MAX_PARAMETER_EXPANSION_SCAN_DEPTH {
4241 return scan_command_subst_parameter_expansion_len_balanced(input, subst_depth);
4242 }
4243
4244 let mut index = 0usize;
4245 let mut in_single = false;
4246 let mut in_double = false;
4247 let mut in_ansi_c_single = false;
4248 let mut in_backtick = false;
4249 let mut escaped = false;
4250 let mut ansi_c_quote_pending = false;
4251
4252 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4253 let was_escaped = escaped;
4254 if ch == '\\' && !in_single {
4255 escaped = !escaped;
4256 index = next_index;
4257 ansi_c_quote_pending = false;
4258 continue;
4259 }
4260 escaped = false;
4261
4262 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4263 if input[next_index..].starts_with('{')
4264 && let Some(consumed) = scan_command_subst_parameter_expansion_len(
4265 &input[next_index + '{'.len_utf8()..],
4266 subst_depth,
4267 parameter_depth + 1,
4268 )
4269 {
4270 index = next_index + '{'.len_utf8() + consumed;
4271 ansi_c_quote_pending = false;
4272 continue;
4273 }
4274
4275 if input[next_index..].starts_with('(')
4276 && !input[next_index + '('.len_utf8()..].starts_with('(')
4277 && let Some(consumed) = scan_command_substitution_body_len_inner(
4278 &input[next_index + '('.len_utf8()..],
4279 subst_depth + 1,
4280 )
4281 {
4282 index = next_index + '('.len_utf8() + consumed;
4283 ansi_c_quote_pending = false;
4284 continue;
4285 }
4286 }
4287
4288 if !in_single
4289 && !in_ansi_c_single
4290 && !in_double
4291 && !in_backtick
4292 && !was_escaped
4293 && matches!(ch, '<' | '>')
4294 && input[next_index..].starts_with('(')
4295 && let Some(consumed) = scan_command_substitution_body_len_inner(
4296 &input[next_index + '('.len_utf8()..],
4297 subst_depth + 1,
4298 )
4299 {
4300 index = next_index + '('.len_utf8() + consumed;
4301 ansi_c_quote_pending = false;
4302 continue;
4303 }
4304
4305 match ch {
4306 '\'' if !in_double && !in_backtick && !was_escaped => {
4307 if in_ansi_c_single {
4308 in_ansi_c_single = false;
4309 } else if !in_single && ansi_c_quote_pending {
4310 in_ansi_c_single = true;
4311 } else {
4312 in_single = !in_single;
4313 }
4314 }
4315 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4316 in_double = !in_double
4317 }
4318 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4319 in_backtick = !in_backtick
4320 }
4321 '}' if !in_single
4322 && !in_ansi_c_single
4323 && !in_double
4324 && !in_backtick
4325 && !was_escaped =>
4326 {
4327 return Some(next_index);
4328 }
4329 _ => {}
4330 }
4331
4332 ansi_c_quote_pending = ch == '$'
4333 && !in_single
4334 && !in_ansi_c_single
4335 && !in_double
4336 && !in_backtick
4337 && !was_escaped;
4338 index = next_index;
4339 }
4340
4341 None
4342}
4343
4344fn scan_command_subst_parameter_expansion_len_balanced(
4345 input: &str,
4346 subst_depth: usize,
4347) -> Option<usize> {
4348 let mut index = 0usize;
4349 let mut brace_depth = 1usize;
4350 let mut in_single = false;
4351 let mut in_double = false;
4352 let mut in_ansi_c_single = false;
4353 let mut in_backtick = false;
4354 let mut escaped = false;
4355 let mut ansi_c_quote_pending = false;
4356
4357 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4358 let was_escaped = escaped;
4359 if ch == '\\' && !in_single {
4360 escaped = !escaped;
4361 index = next_index;
4362 ansi_c_quote_pending = false;
4363 continue;
4364 }
4365 escaped = false;
4366
4367 if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped && ch == '$' {
4368 if input[next_index..].starts_with('{') {
4369 brace_depth = brace_depth.saturating_add(1);
4370 index = next_index + '{'.len_utf8();
4371 ansi_c_quote_pending = false;
4372 continue;
4373 }
4374
4375 if input[next_index..].starts_with('(')
4376 && !input[next_index + '('.len_utf8()..].starts_with('(')
4377 && let Some(consumed) = scan_command_substitution_body_len_inner(
4378 &input[next_index + '('.len_utf8()..],
4379 subst_depth + 1,
4380 )
4381 {
4382 index = next_index + '('.len_utf8() + consumed;
4383 ansi_c_quote_pending = false;
4384 continue;
4385 }
4386 }
4387
4388 if !in_single
4389 && !in_ansi_c_single
4390 && !in_double
4391 && !in_backtick
4392 && !was_escaped
4393 && matches!(ch, '<' | '>')
4394 && input[next_index..].starts_with('(')
4395 && let Some(consumed) = scan_command_substitution_body_len_inner(
4396 &input[next_index + '('.len_utf8()..],
4397 subst_depth + 1,
4398 )
4399 {
4400 index = next_index + '('.len_utf8() + consumed;
4401 ansi_c_quote_pending = false;
4402 continue;
4403 }
4404
4405 match ch {
4406 '\'' if !in_double && !in_backtick && !was_escaped => {
4407 if in_ansi_c_single {
4408 in_ansi_c_single = false;
4409 } else if !in_single && ansi_c_quote_pending {
4410 in_ansi_c_single = true;
4411 } else {
4412 in_single = !in_single;
4413 }
4414 }
4415 '"' if !in_single && !in_ansi_c_single && !in_backtick && !was_escaped => {
4416 in_double = !in_double
4417 }
4418 '`' if !in_single && !in_ansi_c_single && !in_double && !was_escaped => {
4419 in_backtick = !in_backtick
4420 }
4421 '}' if !in_single
4422 && !in_ansi_c_single
4423 && !in_double
4424 && !in_backtick
4425 && !was_escaped =>
4426 {
4427 brace_depth = brace_depth.saturating_sub(1);
4428 if brace_depth == 0 {
4429 return Some(next_index);
4430 }
4431 }
4432 _ => {}
4433 }
4434
4435 ansi_c_quote_pending = ch == '$'
4436 && !in_single
4437 && !in_ansi_c_single
4438 && !in_double
4439 && !in_backtick
4440 && !was_escaped;
4441 index = next_index;
4442 }
4443
4444 None
4445}
4446
4447fn scan_command_subst_heredoc_delimiter(input: &str, mut index: usize) -> Option<(usize, String)> {
4448 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4449 if !matches!(ch, ' ' | '\t') {
4450 break;
4451 }
4452 index = next_index;
4453 }
4454
4455 let start = index;
4456 let mut cooked = String::new();
4457 let mut in_single = false;
4458 let mut in_double = false;
4459 let mut escaped = false;
4460
4461 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4462 if heredoc_delimiter_is_terminator(ch, in_single, in_double, escaped) {
4463 break;
4464 }
4465
4466 index = next_index;
4467 if escaped {
4468 cooked.push(ch);
4469 escaped = false;
4470 continue;
4471 }
4472
4473 match ch {
4474 '\\' if !in_single => escaped = true,
4475 '\'' if !in_double => in_single = !in_single,
4476 '"' if !in_single => in_double = !in_double,
4477 _ => cooked.push(ch),
4478 }
4479 }
4480
4481 (index > start).then_some((index, cooked))
4482}
4483
4484fn skip_command_subst_pending_heredoc(
4485 input: &str,
4486 mut index: usize,
4487 delimiter: &str,
4488 strip_tabs: bool,
4489) -> usize {
4490 while index <= input.len() {
4491 let rest = &input[index..];
4492 let line_len = rest.find('\n').unwrap_or(rest.len());
4493 let line = &rest[..line_len];
4494 let has_newline = line_len < rest.len();
4495
4496 index += line_len;
4497 if has_newline {
4498 index += '\n'.len_utf8();
4499 }
4500
4501 if heredoc_line_matches_delimiter(line, delimiter, strip_tabs) || !has_newline {
4502 return index;
4503 }
4504 }
4505
4506 index
4507}
4508
4509fn scan_command_subst_ansi_c_single_quoted_segment(
4510 input: &str,
4511 quote_index: usize,
4512) -> Option<usize> {
4513 let mut index = quote_index + '\''.len_utf8();
4514
4515 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4516 index = next_index;
4517 if ch == '\\' {
4518 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4519 index = escaped_next;
4520 }
4521 continue;
4522 }
4523
4524 if ch == '\'' {
4525 return Some(index);
4526 }
4527 }
4528
4529 None
4530}
4531
4532fn scan_command_subst_backtick_segment(input: &str, start: usize) -> Option<usize> {
4533 let mut index = start;
4534
4535 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4536 index = next_index;
4537 if ch == '\\' {
4538 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4539 index = escaped_next;
4540 }
4541 continue;
4542 }
4543
4544 if ch == '`' {
4545 return Some(index);
4546 }
4547 }
4548
4549 None
4550}
4551
4552fn flush_scanned_command_subst_keyword(
4553 current_word: &mut String,
4554 pending_case_headers: &mut usize,
4555 case_clause_depths: &mut SmallVec<[usize; 4]>,
4556 depth: usize,
4557 word_started_at_command_start: &mut bool,
4558) {
4559 if current_word.is_empty() {
4560 *word_started_at_command_start = false;
4561 return;
4562 }
4563
4564 match current_word.as_str() {
4565 "case" if *word_started_at_command_start => *pending_case_headers += 1,
4566 "in" if *pending_case_headers > 0 => {
4567 *pending_case_headers -= 1;
4568 case_clause_depths.push(depth);
4569 }
4570 "esac" if *word_started_at_command_start => {
4571 case_clause_depths.pop();
4572 }
4573 _ => {}
4574 }
4575
4576 current_word.clear();
4577 *word_started_at_command_start = false;
4578}
4579
4580pub(super) fn scan_command_substitution_body_len_inner(
4581 input: &str,
4582 subst_depth: usize,
4583) -> Option<usize> {
4584 if subst_depth >= DEFAULT_MAX_SUBST_DEPTH {
4585 return None;
4586 }
4587
4588 let mut index = 0usize;
4589 let mut depth = 1;
4590 let mut pending_heredocs = SmallVec::<[(String, bool); 2]>::new();
4591 let mut pending_case_headers = 0usize;
4592 let mut case_clause_depths = SmallVec::<[usize; 4]>::new();
4593 let mut current_word = String::with_capacity(16);
4594 let mut at_command_start = true;
4595 let mut expecting_redirection_target = false;
4596 let mut current_word_started_at_command_start = false;
4597
4598 while let Some((ch, next_index)) = next_char_boundary(input, index) {
4599 match ch {
4600 '#' if hash_starts_comment(input, index) => {
4601 let had_word = !current_word.is_empty();
4602 flush_scanned_command_subst_keyword(
4603 &mut current_word,
4604 &mut pending_case_headers,
4605 &mut case_clause_depths,
4606 depth,
4607 &mut current_word_started_at_command_start,
4608 );
4609 if had_word && expecting_redirection_target {
4610 expecting_redirection_target = false;
4611 }
4612 index = next_index;
4613 while let Some((comment_ch, comment_next)) = next_char_boundary(input, index) {
4614 index = comment_next;
4615 if comment_ch == '\n' {
4616 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4617 index = skip_command_subst_pending_heredoc(
4618 input, index, &delimiter, strip_tabs,
4619 );
4620 }
4621 at_command_start = true;
4622 expecting_redirection_target = false;
4623 break;
4624 }
4625 }
4626 }
4627 '(' => {
4628 flush_scanned_command_subst_keyword(
4629 &mut current_word,
4630 &mut pending_case_headers,
4631 &mut case_clause_depths,
4632 depth,
4633 &mut current_word_started_at_command_start,
4634 );
4635 depth += 1;
4636 index = next_index;
4637 at_command_start = true;
4638 expecting_redirection_target = false;
4639 }
4640 ')' => {
4641 flush_scanned_command_subst_keyword(
4642 &mut current_word,
4643 &mut pending_case_headers,
4644 &mut case_clause_depths,
4645 depth,
4646 &mut current_word_started_at_command_start,
4647 );
4648 if case_clause_depths
4649 .last()
4650 .is_some_and(|case_depth| *case_depth == depth)
4651 {
4652 index = next_index;
4653 at_command_start = true;
4654 expecting_redirection_target = false;
4655 continue;
4656 }
4657 depth -= 1;
4658 index = next_index;
4659 if depth == 0 {
4660 return Some(index);
4661 }
4662 at_command_start = false;
4663 expecting_redirection_target = false;
4664 }
4665 '"' => {
4666 let had_word = !current_word.is_empty();
4667 flush_scanned_command_subst_keyword(
4668 &mut current_word,
4669 &mut pending_case_headers,
4670 &mut case_clause_depths,
4671 depth,
4672 &mut current_word_started_at_command_start,
4673 );
4674 if had_word && expecting_redirection_target {
4675 expecting_redirection_target = false;
4676 }
4677 index = scan_double_quoted_command_substitution_segment(
4678 input,
4679 next_index,
4680 subst_depth,
4681 )?;
4682 if expecting_redirection_target {
4683 expecting_redirection_target = false;
4684 } else {
4685 at_command_start = false;
4686 }
4687 }
4688 '\'' => {
4689 let had_word = !current_word.is_empty();
4690 flush_scanned_command_subst_keyword(
4691 &mut current_word,
4692 &mut pending_case_headers,
4693 &mut case_clause_depths,
4694 depth,
4695 &mut current_word_started_at_command_start,
4696 );
4697 if had_word && expecting_redirection_target {
4698 expecting_redirection_target = false;
4699 }
4700 index = next_index;
4701 while let Some((quoted_ch, quoted_next)) = next_char_boundary(input, index) {
4702 index = quoted_next;
4703 if quoted_ch == '\'' {
4704 break;
4705 }
4706 }
4707 if expecting_redirection_target {
4708 expecting_redirection_target = false;
4709 } else {
4710 at_command_start = false;
4711 }
4712 }
4713 '`' => {
4714 let had_word = !current_word.is_empty();
4715 flush_scanned_command_subst_keyword(
4716 &mut current_word,
4717 &mut pending_case_headers,
4718 &mut case_clause_depths,
4719 depth,
4720 &mut current_word_started_at_command_start,
4721 );
4722 if had_word && expecting_redirection_target {
4723 expecting_redirection_target = false;
4724 }
4725 index = scan_command_subst_backtick_segment(input, next_index)?;
4726 if expecting_redirection_target {
4727 expecting_redirection_target = false;
4728 } else {
4729 at_command_start = false;
4730 }
4731 }
4732 '$' if input[next_index..].starts_with('\'') => {
4733 let had_word = !current_word.is_empty();
4734 flush_scanned_command_subst_keyword(
4735 &mut current_word,
4736 &mut pending_case_headers,
4737 &mut case_clause_depths,
4738 depth,
4739 &mut current_word_started_at_command_start,
4740 );
4741 if had_word && expecting_redirection_target {
4742 expecting_redirection_target = false;
4743 }
4744 index = scan_command_subst_ansi_c_single_quoted_segment(input, next_index)?;
4745 if expecting_redirection_target {
4746 expecting_redirection_target = false;
4747 } else {
4748 at_command_start = false;
4749 }
4750 }
4751 '\\' => {
4752 let had_word = !current_word.is_empty();
4753 flush_scanned_command_subst_keyword(
4754 &mut current_word,
4755 &mut pending_case_headers,
4756 &mut case_clause_depths,
4757 depth,
4758 &mut current_word_started_at_command_start,
4759 );
4760 if had_word && expecting_redirection_target {
4761 expecting_redirection_target = false;
4762 }
4763 index = next_index;
4764 if let Some((_, escaped_next)) = next_char_boundary(input, index) {
4765 index = escaped_next;
4766 }
4767 if expecting_redirection_target {
4768 expecting_redirection_target = false;
4769 } else {
4770 at_command_start = false;
4771 }
4772 }
4773 '>' => {
4774 let word_was_redirection_fd = current_word_started_at_command_start
4775 && !current_word.is_empty()
4776 && current_word.chars().all(|current| current.is_ascii_digit());
4777 flush_scanned_command_subst_keyword(
4778 &mut current_word,
4779 &mut pending_case_headers,
4780 &mut case_clause_depths,
4781 depth,
4782 &mut current_word_started_at_command_start,
4783 );
4784 if word_was_redirection_fd {
4785 at_command_start = true;
4786 }
4787 index = next_index;
4788 expecting_redirection_target = true;
4789 }
4790 '<' if input[next_index..].starts_with('<') => {
4791 let word_was_redirection_fd = current_word_started_at_command_start
4792 && !current_word.is_empty()
4793 && current_word.chars().all(|current| current.is_ascii_digit());
4794 let had_word = !current_word.is_empty();
4795 flush_scanned_command_subst_keyword(
4796 &mut current_word,
4797 &mut pending_case_headers,
4798 &mut case_clause_depths,
4799 depth,
4800 &mut current_word_started_at_command_start,
4801 );
4802 if had_word && expecting_redirection_target {
4803 expecting_redirection_target = false;
4804 }
4805 if word_was_redirection_fd {
4806 at_command_start = true;
4807 }
4808 if inside_unclosed_double_paren_on_line(input, index) {
4809 index = next_index + '<'.len_utf8();
4810 continue;
4811 }
4812
4813 if input[next_index + '<'.len_utf8()..].starts_with('<') {
4814 index = next_index + '<'.len_utf8() + '<'.len_utf8();
4815 expecting_redirection_target = true;
4816 continue;
4817 }
4818
4819 let strip_tabs = input[next_index..].starts_with("<-");
4820 let delimiter_start = next_index + if strip_tabs { 2 } else { 1 };
4821 if let Some((delimiter_index, delimiter)) =
4822 scan_command_subst_heredoc_delimiter(input, delimiter_start)
4823 {
4824 pending_heredocs.push((delimiter, strip_tabs));
4825 index = delimiter_index;
4826 expecting_redirection_target = false;
4827 } else {
4828 index = next_index;
4829 expecting_redirection_target = true;
4830 }
4831 }
4832 '\n' => {
4833 flush_scanned_command_subst_keyword(
4834 &mut current_word,
4835 &mut pending_case_headers,
4836 &mut case_clause_depths,
4837 depth,
4838 &mut current_word_started_at_command_start,
4839 );
4840 index = next_index;
4841 for (delimiter, strip_tabs) in pending_heredocs.drain(..) {
4842 index =
4843 skip_command_subst_pending_heredoc(input, index, &delimiter, strip_tabs);
4844 }
4845 at_command_start = true;
4846 expecting_redirection_target = false;
4847 }
4848 '$' if input[next_index..].starts_with('{') => {
4849 let had_word = !current_word.is_empty();
4850 flush_scanned_command_subst_keyword(
4851 &mut current_word,
4852 &mut pending_case_headers,
4853 &mut case_clause_depths,
4854 depth,
4855 &mut current_word_started_at_command_start,
4856 );
4857 if had_word && expecting_redirection_target {
4858 expecting_redirection_target = false;
4859 }
4860 let consumed = scan_command_subst_parameter_expansion_len(
4861 &input[next_index + '{'.len_utf8()..],
4862 subst_depth,
4863 0,
4864 )?;
4865 index = next_index + '{'.len_utf8() + consumed;
4866 if expecting_redirection_target {
4867 expecting_redirection_target = false;
4868 } else {
4869 at_command_start = false;
4870 }
4871 }
4872 '$' if input[next_index..].starts_with('(')
4873 && !input[next_index + '('.len_utf8()..].starts_with('(') =>
4874 {
4875 let had_word = !current_word.is_empty();
4876 flush_scanned_command_subst_keyword(
4877 &mut current_word,
4878 &mut pending_case_headers,
4879 &mut case_clause_depths,
4880 depth,
4881 &mut current_word_started_at_command_start,
4882 );
4883 if had_word && expecting_redirection_target {
4884 expecting_redirection_target = false;
4885 }
4886 let consumed = scan_command_substitution_body_len_inner(
4887 &input[next_index + '('.len_utf8()..],
4888 subst_depth + 1,
4889 )?;
4890 index = next_index + '('.len_utf8() + consumed;
4891 if expecting_redirection_target {
4892 expecting_redirection_target = false;
4893 } else {
4894 at_command_start = false;
4895 }
4896 }
4897 _ => {
4898 if ch.is_ascii_alphanumeric() || ch == '_' {
4899 if current_word.is_empty() && !expecting_redirection_target && at_command_start
4900 {
4901 current_word_started_at_command_start = true;
4902 at_command_start = false;
4903 }
4904 current_word.push(ch);
4905 } else {
4906 let had_word = !current_word.is_empty();
4907 flush_scanned_command_subst_keyword(
4908 &mut current_word,
4909 &mut pending_case_headers,
4910 &mut case_clause_depths,
4911 depth,
4912 &mut current_word_started_at_command_start,
4913 );
4914 if had_word && expecting_redirection_target {
4915 expecting_redirection_target = false;
4916 }
4917 match ch {
4918 ' ' | '\t' => {}
4919 ';' | '|' | '&' => {
4920 at_command_start = true;
4921 expecting_redirection_target = false;
4922 }
4923 _ => {
4924 if !expecting_redirection_target {
4925 at_command_start = false;
4926 }
4927 }
4928 }
4929 }
4930 index = next_index;
4931 }
4932 }
4933 }
4934
4935 None
4936}
4937
4938pub(super) fn scan_command_substitution_body_len(input: &str) -> Option<usize> {
4939 scan_command_substitution_body_len_inner(input, 0)
4940}
4941
4942#[cfg(test)]
4943mod tests {
4944 use super::*;
4945
4946 fn token_text(token: &LexedToken<'_>, source: &str) -> Option<String> {
4947 match token.kind {
4948 kind if kind.is_word_like() => token.word_string(),
4949 TokenKind::Comment => token
4950 .span
4951 .slice(source)
4952 .strip_prefix('#')
4953 .map(str::to_string),
4954 TokenKind::Error => token
4955 .error_kind()
4956 .map(LexerErrorKind::message)
4957 .map(str::to_string),
4958 _ => None,
4959 }
4960 }
4961
4962 fn assert_next_token(
4963 lexer: &mut Lexer<'_>,
4964 expected_kind: TokenKind,
4965 expected_text: Option<&str>,
4966 ) {
4967 let token = lexer.next_lexed_token().unwrap();
4968 assert_eq!(token.kind, expected_kind);
4969 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4970 }
4971
4972 fn assert_next_token_with_comments(
4973 lexer: &mut Lexer<'_>,
4974 expected_kind: TokenKind,
4975 expected_text: Option<&str>,
4976 ) {
4977 let token = lexer.next_lexed_token_with_comments().unwrap();
4978 assert_eq!(token.kind, expected_kind);
4979 assert_eq!(token_text(&token, lexer.input).as_deref(), expected_text);
4980 }
4981
4982 fn assert_non_newline_tokens_stay_on_one_line(input: &str) {
4983 let mut lexer = Lexer::new(input);
4984
4985 while let Some(token) = lexer.next_lexed_token() {
4986 if token.kind == TokenKind::Newline {
4987 continue;
4988 }
4989
4990 assert_eq!(
4991 token.span.start.line, token.span.end.line,
4992 "token should stay on one line: {:?}",
4993 token
4994 );
4995 }
4996 }
4997
4998 #[test]
4999 fn test_simple_words() {
5000 let mut lexer = Lexer::new("echo hello world");
5001
5002 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5003 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5004 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5005 assert!(lexer.next_lexed_token().is_none());
5006 }
5007
5008 #[test]
5009 fn test_single_quoted_string() {
5010 let mut lexer = Lexer::new("echo 'hello world'");
5011
5012 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5013 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("hello world"));
5015 assert!(lexer.next_lexed_token().is_none());
5016 }
5017
5018 #[test]
5019 fn test_double_quoted_string() {
5020 let mut lexer = Lexer::new("echo \"hello world\"");
5021
5022 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5023 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("hello world"));
5024 assert!(lexer.next_lexed_token().is_none());
5025 }
5026
5027 #[test]
5028 fn test_brace_expansion_token_ignores_quoted_closers() {
5029 let mut lexer = Lexer::new("echo {\"}\",a}\n");
5030
5031 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5032 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{"}",a}"#));
5033 assert_next_token(&mut lexer, TokenKind::Newline, None);
5034 assert!(lexer.next_lexed_token().is_none());
5035 }
5036
5037 #[test]
5038 fn test_brace_expansion_token_preserves_single_quoted_backslash_member_boundary() {
5039 let mut lexer = Lexer::new("echo {'a\\',b} next\n");
5040
5041 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5042 assert_next_token(&mut lexer, TokenKind::Word, Some(r#"{'a\',b}"#));
5043 assert_next_token(&mut lexer, TokenKind::Word, Some("next"));
5044 assert_next_token(&mut lexer, TokenKind::Newline, None);
5045 assert!(lexer.next_lexed_token().is_none());
5046 }
5047
5048 #[test]
5049 fn test_double_quoted_expansion_token_keeps_source_backing() {
5050 let source = r#""$bar""#;
5051 let mut lexer = Lexer::new(source);
5052
5053 let token = lexer.next_lexed_token().unwrap();
5054 assert_eq!(token.kind, TokenKind::QuotedWord);
5055 assert_eq!(token.word_text(), Some("$bar"));
5056
5057 let word = token.word().unwrap();
5058 let segment = word.single_segment().unwrap();
5059 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5060 assert_eq!(segment.span().unwrap().slice(source), "$bar");
5061 }
5062
5063 #[test]
5064 fn test_double_quoted_token_preserves_inner_quoted_command_substitution_pipeline() {
5065 let source = r#""$(echo "$line" | cut -d' ' -f2-)""#;
5066 let mut lexer = Lexer::new(source);
5067
5068 let token = lexer.next_lexed_token().unwrap();
5069 assert_eq!(token.kind, TokenKind::QuotedWord);
5070 assert_eq!(
5071 token.word_text(),
5072 Some(r#"$(echo "$line" | cut -d' ' -f2-)"#)
5073 );
5074 }
5075
5076 #[test]
5077 fn test_double_quoted_token_preserves_braced_param_pipeline_substitution() {
5078 let source = r#""$(echo "${@}" | tr -d '[:space:]')""#;
5079 let mut lexer = Lexer::new(source);
5080
5081 let token = lexer.next_lexed_token().unwrap();
5082 assert_eq!(token.kind, TokenKind::QuotedWord);
5083 assert_eq!(
5084 token.word_text(),
5085 Some(r#"$(echo "${@}" | tr -d '[:space:]')"#)
5086 );
5087 }
5088
5089 #[test]
5090 fn test_deep_command_substitution_preserves_simple_parameter_expansion() {
5091 let source = r#""$(echo "$(echo "$(echo "$(echo "${name}")")")")""#;
5092 let mut lexer = Lexer::new(source);
5093
5094 let token = lexer.next_lexed_token().unwrap();
5095 assert_eq!(token.kind, TokenKind::QuotedWord);
5096 assert_eq!(
5097 token.word_text(),
5098 Some(r#"$(echo "$(echo "$(echo "$(echo "${name}")")")")"#)
5099 );
5100 }
5101
5102 #[test]
5103 fn test_command_substitution_preserves_deep_parameter_operand_paren() {
5104 let source = r#""$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")""#;
5105 let mut lexer = Lexer::new(source);
5106
5107 let token = lexer.next_lexed_token().unwrap();
5108 assert_eq!(token.kind, TokenKind::QuotedWord);
5109 assert_eq!(
5110 token.word_text(),
5111 Some(r#"$(echo "${a:-${b:-${c:-${d:-${e:-x})}}}}")"#)
5112 );
5113 }
5114
5115 #[test]
5116 fn test_mixed_word_keeps_segment_kinds() {
5117 let source = r#"foo"bar"'baz'"#;
5118 let mut lexer = Lexer::new(source);
5119
5120 let token = lexer.next_lexed_token().unwrap();
5121 assert_eq!(token.kind, TokenKind::Word);
5122
5123 let word = token.word().unwrap();
5124 let segments: Vec<_> = word
5125 .segments()
5126 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5127 .collect();
5128
5129 assert_eq!(
5130 segments,
5131 vec![
5132 (LexedWordSegmentKind::Plain, "foo".to_string()),
5133 (LexedWordSegmentKind::DoubleQuoted, "bar".to_string()),
5134 (LexedWordSegmentKind::SingleQuoted, "baz".to_string()),
5135 ]
5136 );
5137 assert_eq!(word.joined_text(), "foobarbaz");
5138 assert_eq!(
5139 word.segments()
5140 .next()
5141 .and_then(LexedWordSegment::span)
5142 .unwrap()
5143 .slice(source),
5144 "foo"
5145 );
5146 }
5147
5148 #[test]
5149 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc() {
5150 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)\"";
5151
5152 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5153 let body = &source[..consumed];
5154
5155 assert!(body.contains("field, direction"));
5156 assert!(body.ends_with(')'));
5157 }
5158
5159 #[test]
5160 fn test_scan_command_substitution_body_len_handles_separator_started_comment() {
5161 let source = "printf '%s' x;# comment with ) and ,\nprintf '%s' y\n)\"";
5162
5163 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5164 let body = &source[..consumed];
5165
5166 assert!(body.contains("printf '%s' y"));
5167 assert!(body.ends_with(')'));
5168 }
5169
5170 #[test]
5171 fn test_scan_command_substitution_body_len_handles_grouping_comment_after_left_paren() {
5172 let source = " (# comment with )\nprintf %s 1,2\n) )\"";
5173
5174 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5175 let body = &source[..consumed];
5176
5177 assert!(body.contains("printf %s 1,2"));
5178 assert!(body.ends_with(')'));
5179 }
5180
5181 #[test]
5182 fn test_scan_command_substitution_body_len_handles_piped_heredoc_delimiter_without_space() {
5183 let source = "\ncat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)\"";
5184
5185 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5186 let body = &source[..consumed];
5187
5188 assert!(body.contains("field, direction"));
5189 assert!(body.ends_with(')'));
5190 }
5191
5192 #[test]
5193 fn test_scan_command_substitution_body_len_handles_parameter_expansion_with_right_paren() {
5194 let source = "printf %s ${x//foo/)},1)\"";
5195
5196 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5197 let body = &source[..consumed];
5198
5199 assert!(body.contains("${x//foo/)},1"));
5200 assert!(body.ends_with(')'));
5201 }
5202
5203 #[test]
5204 fn test_scan_command_substitution_body_len_handles_case_pattern_comment_after_right_paren() {
5205 let source = "case $kind in\na)# comment with esac )\nprintf %s 1,2 ;;\nesac\n)\"";
5206
5207 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5208 let body = &source[..consumed];
5209
5210 assert!(body.contains("printf %s 1,2"));
5211 assert!(body.ends_with(')'));
5212 }
5213
5214 #[test]
5215 fn test_hash_starts_comment_ignores_zsh_inline_glob_controls_after_left_paren() {
5216 let source = "[[ \"$buf\" == (#b)(*) ]]";
5217 let index = source.find('#').expect("expected hash");
5218
5219 assert!(!hash_starts_comment(source, index));
5220 }
5221
5222 #[test]
5223 fn test_hash_starts_comment_allows_grouped_comments_without_space_after_hash() {
5224 let source = "(#comment with )";
5225 let index = source.find('#').expect("expected hash");
5226
5227 assert!(hash_starts_comment(source, index));
5228 }
5229
5230 #[test]
5231 fn test_hash_starts_comment_ignores_hash_inside_unclosed_double_parens() {
5232 let source = "(( #c < 256 ))";
5233 let index = source.find('#').expect("expected hash");
5234
5235 assert!(!hash_starts_comment(source, index));
5236 }
5237
5238 #[test]
5239 fn test_hash_starts_comment_respects_quoted_double_parens() {
5240 let source = "printf '((' # comment";
5241 let index = source.find('#').expect("expected hash");
5242
5243 assert!(hash_starts_comment(source, index));
5244 }
5245
5246 #[test]
5247 fn test_scan_command_substitution_body_len_handles_quoted_double_parens_before_comments() {
5248 let source = "printf '((' # comment with )\nprintf %s 1,2\n)\"";
5249
5250 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5251 let body = &source[..consumed];
5252
5253 assert!(body.contains("printf %s 1,2"));
5254 assert!(body.ends_with(')'));
5255 }
5256
5257 #[test]
5258 fn test_scan_command_substitution_body_len_handles_grouped_comments_without_space_after_hash() {
5259 let source = " (#comment with )\nprintf %s 1,2\n) )\"";
5260
5261 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5262 let body = &source[..consumed];
5263
5264 assert!(body.contains("printf %s 1,2"));
5265 assert!(body.ends_with(')'));
5266 }
5267
5268 #[test]
5269 fn test_scan_command_substitution_body_len_ignores_arithmetic_shift_for_heredoc_detection() {
5270 let source = "((x<<2))\nprintf %s 1,2\n)\"";
5271
5272 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5273 let body = &source[..consumed];
5274
5275 assert!(body.contains("printf %s 1,2"));
5276 assert!(body.ends_with(')'));
5277 }
5278
5279 #[test]
5280 fn test_scan_command_substitution_body_len_handles_nested_case_pattern_right_paren() {
5281 let source = "(case $kind in\na) printf %s 1,2 ;;\nesac\n))\"";
5282
5283 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5284 let body = &source[..consumed];
5285
5286 assert!(body.contains("printf %s 1,2"));
5287 assert!(body.ends_with("))"));
5288 }
5289
5290 #[test]
5291 fn test_scan_command_substitution_body_len_ignores_plain_case_words_in_commands() {
5292 let source = "printf %s 1,2; echo case in)\"";
5293
5294 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5295 let body = &source[..consumed];
5296
5297 assert!(body.contains("echo case in"));
5298 assert!(body.ends_with(')'));
5299 }
5300
5301 #[test]
5302 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_with_escaped_single_quotes() {
5303 let source = "printf %s $'a\\'b'; printf %s 1,2)\"";
5304
5305 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5306 let body = &source[..consumed];
5307
5308 assert!(body.contains("$'a\\'b'"));
5309 assert!(body.contains("printf %s 1,2"));
5310 assert!(body.ends_with(')'));
5311 }
5312
5313 #[test]
5314 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens() {
5315 let source = "printf %s `echo foo)`; printf %s ok)\"";
5316
5317 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5318 let body = &source[..consumed];
5319
5320 assert!(body.contains("`echo foo)`"));
5321 assert!(body.contains("printf %s ok"));
5322 assert!(body.ends_with(')'));
5323 }
5324
5325 #[test]
5326 fn test_scan_command_substitution_body_len_handles_backticks_inside_parameter_expansions() {
5327 let source = "printf %s ${x/`echo }`/foo)},1)\"";
5328
5329 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5330 let body = &source[..consumed];
5331
5332 assert!(body.contains("${x/`echo }`/foo)},1"));
5333 assert!(body.ends_with(')'));
5334 }
5335
5336 #[test]
5337 fn test_scan_command_substitution_body_len_handles_process_substitutions_inside_parameter_expansions()
5338 {
5339 let source = "printf %s ${x/<(echo })/foo)},1)\"";
5340
5341 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5342 let body = &source[..consumed];
5343
5344 assert!(body.contains("${x/<(echo })/foo)},1"));
5345 assert!(body.ends_with(')'));
5346 }
5347
5348 #[test]
5349 fn test_scan_command_substitution_body_len_handles_plain_case_words_at_eof() {
5350 let source = "printf %s 1,2; echo case in)";
5351
5352 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5353 let body = &source[..consumed];
5354
5355 assert_eq!(body, source);
5356 }
5357
5358 #[test]
5359 fn test_scan_command_substitution_body_len_handles_ansi_c_quotes_at_eof() {
5360 let source = "printf %s $'a\\'b'; printf %s 1,2)";
5361
5362 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5363 let body = &source[..consumed];
5364
5365 assert_eq!(body, source);
5366 }
5367
5368 #[test]
5369 fn test_scan_command_substitution_body_len_handles_backticks_with_right_parens_at_eof() {
5370 let source = "printf %s `echo foo)`; printf %s ok)";
5371
5372 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5373 let body = &source[..consumed];
5374
5375 assert_eq!(body, source);
5376 }
5377
5378 #[test]
5379 fn test_scan_command_substitution_body_len_handles_inner_quotes_in_pipeline_at_eof() {
5380 let source = "echo \"$line\" | cut -d' ' -f2-)";
5381
5382 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5383 let body = &source[..consumed];
5384
5385 assert_eq!(body, source);
5386 }
5387
5388 #[test]
5389 fn test_scan_command_substitution_body_len_handles_braced_params_in_pipeline_at_eof() {
5390 let source = "echo \"${@}\" | tr -d '[:space:]')";
5391
5392 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5393 let body = &source[..consumed];
5394
5395 assert_eq!(body, source);
5396 }
5397
5398 #[test]
5399 fn test_scan_command_substitution_body_len_handles_tabstripped_heredoc_at_eof() {
5400 let source = "\n\t\t\tcat <<-EOF | tr '\\n' ' '\n\t\t\t\t{\"query\":\"field, direction\"}\n\t\t\tEOF\n\t\t)";
5401
5402 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5403 let body = &source[..consumed];
5404
5405 assert_eq!(body, source);
5406 }
5407
5408 #[test]
5409 fn test_scan_command_substitution_body_len_handles_piped_heredoc_at_eof() {
5410 let source = "cat <<EOF|tr '\\n' ' '\n{\"query\":\"field, direction\"}\nEOF\n)";
5411
5412 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5413 let body = &source[..consumed];
5414
5415 assert_eq!(body, source);
5416 }
5417
5418 #[test]
5419 fn test_lexer_handles_quoted_right_paren_inside_command_substitution_nested_in_arithmetic() {
5420 let source = "echo \"$(echo \"$(( $(printf ')') + 1 ))\")\"";
5421 let mut lexer = Lexer::new(source);
5422
5423 let first = lexer.next_lexed_token().expect("expected first token");
5424 assert!(first.kind.is_word_like(), "{:?}", first.kind);
5425 assert_eq!(first.word_string().as_deref(), Some("echo"));
5426
5427 let second = lexer.next_lexed_token().expect("expected second token");
5428 assert!(second.kind.is_word_like(), "{:?}", second.kind);
5429 assert_eq!(
5430 second.word_string().as_deref(),
5431 Some("$(echo \"$(( $(printf ')') + 1 ))\")")
5432 );
5433 }
5434
5435 #[test]
5436 fn test_scan_command_substitution_body_len_handles_escaped_quotes_before_substitution_tail() {
5437 let source = "echo -n \"\\\"adp_$(echo $var | tr A-Z a-z)\\\": [\"";
5438 let start = source.find("$(").expect("expected command substitution") + 2;
5439 let consumed =
5440 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5441 assert_eq!(&source[start..start + consumed], "echo $var | tr A-Z a-z)");
5442 }
5443
5444 #[test]
5445 fn test_scan_command_substitution_body_len_keeps_nested_command_names() {
5446 let source = "echo $(echo $(basename $filename .fuzz))";
5447 let start = source.find("$(").expect("expected command substitution") + 2;
5448 let consumed =
5449 scan_command_substitution_body_len(&source[start..]).expect("expected match");
5450 assert_eq!(
5451 &source[start..start + consumed],
5452 "echo $(basename $filename .fuzz))"
5453 );
5454 }
5455
5456 #[test]
5457 fn test_scan_command_substitution_body_len_keeps_quoted_nested_control_command() {
5458 let source = "\n [[ \"$config_file\" == *\"$theme.cfg\" ]] && echo \"$(basename \"$config_file\")\"\n )";
5459 let consumed = scan_command_substitution_body_len(source).expect("expected match");
5460 assert_eq!(consumed, source.len());
5461 }
5462
5463 #[test]
5464 fn test_single_quoted_prefix_keeps_plain_continuation_segment() {
5465 let source = "'foo'bar";
5466 let mut lexer = Lexer::new(source);
5467
5468 let token = lexer.next_lexed_token().unwrap();
5469 assert_eq!(token.kind, TokenKind::LiteralWord);
5470
5471 let word = token.word().unwrap();
5472 let segments: Vec<_> = word
5473 .segments()
5474 .map(|segment| (segment.kind(), segment.as_str().to_string()))
5475 .collect();
5476
5477 assert_eq!(
5478 segments,
5479 vec![
5480 (LexedWordSegmentKind::SingleQuoted, "foo".to_string()),
5481 (LexedWordSegmentKind::Plain, "bar".to_string()),
5482 ]
5483 );
5484 assert_eq!(word.joined_text(), "foobar");
5485 assert_eq!(
5486 word.segments()
5487 .nth(1)
5488 .and_then(LexedWordSegment::span)
5489 .unwrap()
5490 .slice(source),
5491 "bar"
5492 );
5493 }
5494
5495 #[test]
5496 fn test_unquoted_command_substitution_word_keeps_source_backing() {
5497 let source = "$(printf hi)";
5498 let mut lexer = Lexer::new(source);
5499
5500 let token = lexer.next_lexed_token().unwrap();
5501 assert_eq!(token.kind, TokenKind::Word);
5502
5503 let word = token.word().unwrap();
5504 let segment = word.single_segment().unwrap();
5505 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5506 assert_eq!(segment.as_str(), source);
5507 assert_eq!(segment.span().unwrap().slice(source), source);
5508 }
5509
5510 #[test]
5511 fn test_unquoted_nested_param_expansion_word_keeps_source_backing() {
5512 let source = "${arr[$RANDOM % ${#arr[@]}]}";
5513 let mut lexer = Lexer::new(source);
5514
5515 let token = lexer.next_lexed_token().unwrap();
5516 assert_eq!(token.kind, TokenKind::Word);
5517
5518 let word = token.word().unwrap();
5519 let segment = word.single_segment().unwrap();
5520 assert_eq!(segment.kind(), LexedWordSegmentKind::Plain);
5521 assert_eq!(segment.as_str(), source);
5522 assert_eq!(segment.span().unwrap().slice(source), source);
5523 }
5524
5525 #[test]
5526 fn test_quoted_prefix_with_command_substitution_continuation_keeps_source_backing() {
5527 let source = "\"foo\"$(printf hi)";
5528 let mut lexer = Lexer::new(source);
5529
5530 let token = lexer.next_lexed_token().unwrap();
5531 assert_eq!(token.kind, TokenKind::Word);
5532
5533 let word = token.word().unwrap();
5534 let continuation = word.segments().nth(1).unwrap();
5535 assert_eq!(continuation.kind(), LexedWordSegmentKind::Plain);
5536 assert_eq!(continuation.as_str(), "$(printf hi)");
5537 assert_eq!(continuation.span().unwrap().slice(source), "$(printf hi)");
5538 }
5539
5540 #[test]
5541 fn test_double_quoted_nested_param_expansion_keeps_source_backing() {
5542 let source = r#""${arr[$RANDOM % ${#arr[@]}]}""#;
5543 let mut lexer = Lexer::new(source);
5544
5545 let token = lexer.next_lexed_token().unwrap();
5546 assert_eq!(token.kind, TokenKind::QuotedWord);
5547
5548 let word = token.word().unwrap();
5549 let segment = word.single_segment().unwrap();
5550 assert_eq!(segment.kind(), LexedWordSegmentKind::DoubleQuoted);
5551 assert_eq!(segment.as_str(), "${arr[$RANDOM % ${#arr[@]}]}");
5552 assert_eq!(
5553 segment.span().unwrap().slice(source),
5554 "${arr[$RANDOM % ${#arr[@]}]}"
5555 );
5556 }
5557
5558 #[test]
5559 fn test_ansi_c_control_escape_can_consume_quote() {
5560 let mut lexer = Lexer::new("echo $'\\c''");
5561
5562 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5563 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("\x07"));
5564 assert!(lexer.next_lexed_token().is_none());
5565 }
5566
5567 #[test]
5568 fn test_parameter_expansion_replacing_double_quote_stays_on_one_line() {
5569 let source = r#"out_line="${out_line//'"'/'\"'}"
5570"#;
5571 let mut lexer = Lexer::new(source);
5572
5573 assert_next_token(
5574 &mut lexer,
5575 TokenKind::Word,
5576 Some(r#"out_line=${out_line//'"'/'"'}"#),
5577 );
5578 assert_next_token(&mut lexer, TokenKind::Newline, None);
5579 assert!(lexer.next_lexed_token().is_none());
5580 }
5581
5582 #[test]
5583 fn test_parameter_expansion_replacing_double_quote_does_not_swallow_following_commands() {
5584 let source = r#"out_line="${out_line//'"'/'\"'}"
5585echo "Error: Missing python3!"
5586cat << 'EOF' > "${pywrapper}"
5587import os
5588EOF
5589"#;
5590 let mut lexer = Lexer::new(source);
5591
5592 assert_next_token(
5593 &mut lexer,
5594 TokenKind::Word,
5595 Some(r#"out_line=${out_line//'"'/'"'}"#),
5596 );
5597 assert_next_token(&mut lexer, TokenKind::Newline, None);
5598 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5599 assert_next_token(
5600 &mut lexer,
5601 TokenKind::QuotedWord,
5602 Some("Error: Missing python3!"),
5603 );
5604 assert_next_token(&mut lexer, TokenKind::Newline, None);
5605 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5606 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5607 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("EOF"));
5608 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5609 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("${pywrapper}"));
5610 }
5611
5612 #[test]
5613 fn test_parameter_expansion_replacement_with_escaped_backslashes_stays_single_token() {
5614 let source = "crypt=${crypt//\\\\/\\\\\\\\}\n";
5615 let mut lexer = Lexer::new(source);
5616
5617 let token = lexer.next_lexed_token().unwrap();
5618 assert_eq!(token.kind, TokenKind::Word);
5619 assert_eq!(token.span.slice(source), "crypt=${crypt//\\\\/\\\\\\\\}");
5620 assert!(token.source_slice(source).is_none());
5621 assert_eq!(
5622 token.word_string().as_deref(),
5623 Some("crypt=${crypt//\\/\\\\}")
5624 );
5625 assert_next_token(&mut lexer, TokenKind::Newline, None);
5626 assert!(lexer.next_lexed_token().is_none());
5627 }
5628
5629 #[test]
5630 fn test_trim_pattern_with_literal_left_brace_does_not_swallow_following_tokens() {
5631 let source = "dns_servercow_info='ServerCow.de\nSite: ServerCow.de\n'\n\nf(){\n if true; then\n txtvalue_old=${response#*{\\\"name\\\":\\\"\"$_sub_domain\"\\\",\\\"ttl\\\":20,\\\"type\\\":\\\"TXT\\\",\\\"content\\\":\\\"}\n fi\n}\n";
5632 let mut lexer = Lexer::new(source);
5633
5634 assert_next_token(
5635 &mut lexer,
5636 TokenKind::Word,
5637 Some("dns_servercow_info=ServerCow.de\nSite: ServerCow.de\n"),
5638 );
5639 assert_next_token(&mut lexer, TokenKind::Newline, None);
5640 assert_next_token(&mut lexer, TokenKind::Newline, None);
5641 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5642 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5643 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5644 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5645 assert_next_token(&mut lexer, TokenKind::Newline, None);
5646 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5647 assert_next_token(&mut lexer, TokenKind::Word, Some("true"));
5648 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5649 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5650 assert_next_token(&mut lexer, TokenKind::Newline, None);
5651 assert_next_token(
5652 &mut lexer,
5653 TokenKind::Word,
5654 Some(
5655 "txtvalue_old=${response#*{\"name\":\"\"$_sub_domain\"\",\"ttl\":20,\"type\":\"TXT\",\"content\":\"}",
5656 ),
5657 );
5658 assert_next_token(&mut lexer, TokenKind::Newline, None);
5659 assert_next_token(&mut lexer, TokenKind::Word, Some("fi"));
5660 assert_next_token(&mut lexer, TokenKind::Newline, None);
5661 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5662 assert_next_token(&mut lexer, TokenKind::Newline, None);
5663 assert!(lexer.next_lexed_token().is_none());
5664 }
5665
5666 #[test]
5667 fn test_case_pattern_literal_left_brace_does_not_swallow_following_arms() {
5668 let source = "case \"$word\" in\n {) : ;;\n :) : ;;\nesac\n";
5669 let mut lexer = Lexer::new(source);
5670
5671 assert_next_token(&mut lexer, TokenKind::Word, Some("case"));
5672 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$word"));
5673 assert_next_token(&mut lexer, TokenKind::Word, Some("in"));
5674 assert_next_token(&mut lexer, TokenKind::Newline, None);
5675 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
5676 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5677 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5678 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5679 assert_next_token(&mut lexer, TokenKind::Newline, None);
5680 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5681 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5682 assert_next_token(&mut lexer, TokenKind::Word, Some(":"));
5683 assert_next_token(&mut lexer, TokenKind::DoubleSemicolon, None);
5684 assert_next_token(&mut lexer, TokenKind::Newline, None);
5685 assert_next_token(&mut lexer, TokenKind::Word, Some("esac"));
5686 assert_next_token(&mut lexer, TokenKind::Newline, None);
5687 assert!(lexer.next_lexed_token().is_none());
5688 }
5689
5690 #[test]
5691 fn test_conditional_regex_literal_left_brace_keeps_closing_tokens() {
5692 let source = "if [[ $MOTD ]] && ! [[ $MOTD =~ ^{ ]]; then\n";
5693 let mut lexer = Lexer::new(source);
5694
5695 assert_next_token(&mut lexer, TokenKind::Word, Some("if"));
5696 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5697 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5698 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5699 assert_next_token(&mut lexer, TokenKind::And, None);
5700 assert_next_token(&mut lexer, TokenKind::Word, Some("!"));
5701 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5702 assert_next_token(&mut lexer, TokenKind::Word, Some("$MOTD"));
5703 assert_next_token(&mut lexer, TokenKind::Word, Some("=~"));
5704 assert_next_token(&mut lexer, TokenKind::Word, Some("^{"));
5705 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5706 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5707 assert_next_token(&mut lexer, TokenKind::Word, Some("then"));
5708 assert_next_token(&mut lexer, TokenKind::Newline, None);
5709 assert!(lexer.next_lexed_token().is_none());
5710 }
5711
5712 #[test]
5713 fn test_midword_brace_expansion_with_command_substitution_stays_single_word() {
5714 let source = "echo -{$(echo a),b}-\n";
5715 let mut lexer = Lexer::new(source);
5716
5717 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5718 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$(echo a),b}-"));
5719 assert_next_token(&mut lexer, TokenKind::Newline, None);
5720 assert!(lexer.next_lexed_token().is_none());
5721 }
5722
5723 #[test]
5724 fn test_midword_brace_expansion_with_arithmetic_substitution_stays_single_word() {
5725 let source = "echo -{$((1 + 2)),b}-\n";
5726 let mut lexer = Lexer::new(source);
5727
5728 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5729 assert_next_token(&mut lexer, TokenKind::Word, Some("-{$((1 + 2)),b}-"));
5730 assert_next_token(&mut lexer, TokenKind::Newline, None);
5731 assert!(lexer.next_lexed_token().is_none());
5732 }
5733
5734 #[test]
5735 fn test_operators() {
5736 let mut lexer = Lexer::new("a |& b | c && d || e; f &");
5737
5738 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5739 assert_next_token(&mut lexer, TokenKind::PipeBoth, None);
5740 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5741 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5742 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5743 assert_next_token(&mut lexer, TokenKind::And, None);
5744 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5745 assert_next_token(&mut lexer, TokenKind::Or, None);
5746 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5747 assert_next_token(&mut lexer, TokenKind::Semicolon, None);
5748 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5749 assert_next_token(&mut lexer, TokenKind::Background, None);
5750 assert!(lexer.next_lexed_token().is_none());
5751 }
5752
5753 #[test]
5754 fn test_double_left_bracket_requires_separator() {
5755 let mut lexer = Lexer::new("[[ foo ]]\n[[z]\n");
5756
5757 assert_next_token(&mut lexer, TokenKind::DoubleLeftBracket, None);
5758 assert_next_token(&mut lexer, TokenKind::Word, Some("foo"));
5759 assert_next_token(&mut lexer, TokenKind::DoubleRightBracket, None);
5760 assert_next_token(&mut lexer, TokenKind::Newline, None);
5761 assert_next_token(&mut lexer, TokenKind::Word, Some("[[z]"));
5762 assert_next_token(&mut lexer, TokenKind::Newline, None);
5763 assert!(lexer.next_lexed_token().is_none());
5764 }
5765
5766 #[test]
5767 fn test_redirects() {
5768 let mut lexer = Lexer::new("a > b >> c >>| d 2>>| e 2>| f < g << h <<< i &>> j <> k");
5769
5770 assert_next_token(&mut lexer, TokenKind::Word, Some("a"));
5771 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5772 assert_next_token(&mut lexer, TokenKind::Word, Some("b"));
5773 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5774 assert_next_token(&mut lexer, TokenKind::Word, Some("c"));
5775 assert_next_token(&mut lexer, TokenKind::RedirectAppend, None);
5776 assert_next_token(&mut lexer, TokenKind::Word, Some("d"));
5777 assert_next_token(&mut lexer, TokenKind::RedirectFdAppend, None);
5778 assert_next_token(&mut lexer, TokenKind::Word, Some("e"));
5779 let token = lexer.next_lexed_token().unwrap();
5780 assert_eq!(token.kind, TokenKind::Clobber);
5781 assert_eq!(token.fd_value(), Some(2));
5782 assert_eq!(token_text(&token, lexer.input), None);
5783 assert_next_token(&mut lexer, TokenKind::Word, Some("f"));
5784 assert_next_token(&mut lexer, TokenKind::RedirectIn, None);
5785 assert_next_token(&mut lexer, TokenKind::Word, Some("g"));
5786 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5787 assert_next_token(&mut lexer, TokenKind::Word, Some("h"));
5788 assert_next_token(&mut lexer, TokenKind::HereString, None);
5789 assert_next_token(&mut lexer, TokenKind::Word, Some("i"));
5790 assert_next_token(&mut lexer, TokenKind::RedirectBothAppend, None);
5791 assert_next_token(&mut lexer, TokenKind::Word, Some("j"));
5792 assert_next_token(&mut lexer, TokenKind::RedirectReadWrite, None);
5793 assert_next_token(&mut lexer, TokenKind::Word, Some("k"));
5794 }
5795
5796 #[test]
5797 fn test_comment() {
5798 let mut lexer = Lexer::new("echo hello # this is a comment\necho world");
5799
5800 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5801 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5802 assert_next_token(&mut lexer, TokenKind::Newline, None);
5803 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5804 assert_next_token(&mut lexer, TokenKind::Word, Some("world"));
5805 }
5806
5807 #[test]
5808 fn test_comment_token_with_span() {
5809 let mut lexer = Lexer::new("# lead\necho hi # tail");
5810
5811 let comment = lexer.next_lexed_token_with_comments().unwrap();
5812 assert_eq!(comment.kind, TokenKind::Comment);
5813 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" lead"));
5814 assert_eq!(comment.span.start.line, 1);
5815 assert_eq!(comment.span.start.column, 1);
5816 assert_eq!(comment.span.end.line, 1);
5817 assert_eq!(comment.span.end.column, 7);
5818
5819 assert_next_token(&mut lexer, TokenKind::Newline, None);
5820 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5821 assert_next_token(&mut lexer, TokenKind::Word, Some("hi"));
5822
5823 let inline = lexer.next_lexed_token_with_comments().unwrap();
5824 assert_eq!(inline.kind, TokenKind::Comment);
5825 assert_eq!(token_text(&inline, lexer.input).as_deref(), Some(" tail"));
5826 assert_eq!(inline.span.start.line, 2);
5827 assert_eq!(inline.span.start.column, 9);
5828 }
5829
5830 #[test]
5831 fn test_comment_token_preserves_hash_boundaries() {
5832 let mut lexer = Lexer::new("echo foo#bar ${x#y} '# nope' \"# nope\" # yep");
5833
5834 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
5835 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("foo#bar"));
5836 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("${x#y}"));
5837 assert_next_token_with_comments(&mut lexer, TokenKind::LiteralWord, Some("# nope"));
5838 assert_next_token_with_comments(&mut lexer, TokenKind::QuotedWord, Some("# nope"));
5839 assert_next_token_with_comments(&mut lexer, TokenKind::Comment, Some(" yep"));
5840 assert!(lexer.next_lexed_token_with_comments().is_none());
5841 }
5842
5843 #[test]
5844 fn test_zsh_inline_glob_control_after_left_paren_is_not_comment() {
5845 let mut lexer = Lexer::new("if [[ \"$buf\" == (#b)(*)(${~pat})* ]]; then\n");
5846
5847 let mut saw_comment = false;
5848 while let Some(token) = lexer.next_lexed_token_with_comments() {
5849 if token.kind == TokenKind::Comment {
5850 saw_comment = true;
5851 break;
5852 }
5853 }
5854
5855 assert!(
5856 !saw_comment,
5857 "zsh inline glob controls inside [[ ]] should not lex as comments"
5858 );
5859 }
5860
5861 #[test]
5862 fn test_zsh_arithmetic_char_literal_inside_double_parens_is_not_comment() {
5863 let mut lexer = Lexer::new("(( #c < 256 / $1 * $1 )) && break\n");
5864
5865 let mut saw_comment = false;
5866 while let Some(token) = lexer.next_lexed_token_with_comments() {
5867 if token.kind == TokenKind::Comment {
5868 saw_comment = true;
5869 break;
5870 }
5871 }
5872
5873 assert!(
5874 !saw_comment,
5875 "zsh arithmetic char literals inside (( )) should not lex as comments"
5876 );
5877 }
5878
5879 #[test]
5880 fn test_double_quoted_parameter_replacement_with_embedded_quotes_stays_single_word() {
5881 let mut lexer = Lexer::new(
5882 "builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n",
5883 );
5884
5885 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5886 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5887 assert_next_token(
5888 &mut lexer,
5889 TokenKind::LiteralWord,
5890 Some("\\e]133;C;cmdline_url=%s\\a"),
5891 );
5892 assert_next_token(
5893 &mut lexer,
5894 TokenKind::QuotedWord,
5895 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5896 );
5897 assert_next_token(&mut lexer, TokenKind::Newline, None);
5898 }
5899
5900 #[test]
5901 fn test_anonymous_function_body_with_nested_replacement_word_keeps_closing_brace_token() {
5902 let mut lexer = Lexer::new(
5903 "() {\n builtin printf '\\e]133;C;cmdline_url=%s\\a' \"${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}\"\n} \"$1\"\n",
5904 );
5905
5906 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
5907 assert_next_token(&mut lexer, TokenKind::RightParen, None);
5908 assert_next_token(&mut lexer, TokenKind::LeftBrace, None);
5909 assert_next_token(&mut lexer, TokenKind::Newline, None);
5910 assert_next_token(&mut lexer, TokenKind::Word, Some("builtin"));
5911 assert_next_token(&mut lexer, TokenKind::Word, Some("printf"));
5912 assert_next_token(
5913 &mut lexer,
5914 TokenKind::LiteralWord,
5915 Some("\\e]133;C;cmdline_url=%s\\a"),
5916 );
5917 assert_next_token(
5918 &mut lexer,
5919 TokenKind::QuotedWord,
5920 Some("${1//(#m)[^a-zA-Z0-9\"\\/:_.-!'()~\"]/%${(l:2::0:)$(([##16]#MATCH))}}"),
5921 );
5922 assert_next_token(&mut lexer, TokenKind::Newline, None);
5923 assert_next_token(&mut lexer, TokenKind::RightBrace, None);
5924 assert_next_token(&mut lexer, TokenKind::QuotedWord, Some("$1"));
5925 assert_next_token(&mut lexer, TokenKind::Newline, None);
5926 }
5927
5928 #[test]
5929 fn test_variable_words() {
5930 let mut lexer = Lexer::new("echo $HOME $USER");
5931
5932 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5933 assert_next_token(&mut lexer, TokenKind::Word, Some("$HOME"));
5934 assert_next_token(&mut lexer, TokenKind::Word, Some("$USER"));
5935 assert!(lexer.next_lexed_token().is_none());
5936 }
5937
5938 #[test]
5939 fn test_pipeline_tokens() {
5940 let mut lexer = Lexer::new("echo hello | cat");
5941
5942 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
5943 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
5944 assert_next_token(&mut lexer, TokenKind::Pipe, None);
5945 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5946 assert!(lexer.next_lexed_token().is_none());
5947 }
5948
5949 #[test]
5950 fn test_read_heredoc() {
5951 let mut lexer = Lexer::new("\nhello\nworld\nEOF");
5953 let content = lexer.read_heredoc("EOF", false);
5954 assert_eq!(content.content, "hello\nworld\n");
5955 }
5956
5957 #[test]
5958 fn test_read_heredoc_single_line() {
5959 let mut lexer = Lexer::new("\ntest\nEOF");
5960 let content = lexer.read_heredoc("EOF", false);
5961 assert_eq!(content.content, "test\n");
5962 }
5963
5964 #[test]
5965 fn test_read_heredoc_full_scenario() {
5966 let mut lexer = Lexer::new("cat <<EOF\nhello\nworld\nEOF");
5968
5969 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5971 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5972 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5973
5974 let content = lexer.read_heredoc("EOF", false);
5976 assert_eq!(content.content, "hello\nworld\n");
5977 }
5978
5979 #[test]
5980 fn test_read_heredoc_with_redirect() {
5981 let mut lexer = Lexer::new("cat <<EOF > file.txt\nhello\nEOF");
5983 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5984 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
5985 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
5986 let content = lexer.read_heredoc("EOF", false);
5987 assert_eq!(content.content, "hello\n");
5988 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
5990 assert_next_token(&mut lexer, TokenKind::Word, Some("file.txt"));
5991 }
5992
5993 #[test]
5994 fn test_read_heredoc_reinjects_line_continued_pipeline_tail() {
5995 let source = "cat <<EOF | grep hello \\\n | sort \\\n > out.txt\nhello\nEOF\n";
5996 let mut lexer = Lexer::new(source);
5997
5998 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
5999 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6000 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6001
6002 let heredoc = lexer.read_heredoc("EOF", false);
6003 assert_eq!(heredoc.content, "hello\n");
6004
6005 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6006 assert_next_token(&mut lexer, TokenKind::Word, Some("grep"));
6007 assert_next_token(&mut lexer, TokenKind::Word, Some("hello"));
6008 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6009 assert_next_token(&mut lexer, TokenKind::Word, Some("sort"));
6010 assert_next_token(&mut lexer, TokenKind::RedirectOut, None);
6011 assert_next_token(&mut lexer, TokenKind::Word, Some("out.txt"));
6012 }
6013
6014 #[test]
6015 fn test_read_heredoc_does_not_continue_body_when_backslash_is_immediately_after_delimiter() {
6016 let source = "cat <<EOF \\\n1\n2\n3\nEOF\n| tac\n";
6017 let mut lexer = Lexer::new(source);
6018
6019 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6020 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6021 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6022
6023 let heredoc = lexer.read_heredoc("EOF", false);
6024 assert_eq!(heredoc.content, "1\n2\n3\n");
6025 }
6026
6027 #[test]
6028 fn test_read_heredoc_escaped_backslash_before_newline_does_not_continue_tail() {
6029 let source = "cat <<EOF foo\\\\\nbody\nEOF\n";
6030 let mut lexer = Lexer::new(source);
6031
6032 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6033 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6034 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6035
6036 let heredoc = lexer.read_heredoc("EOF", false);
6037 assert_eq!(heredoc.content, "body\n");
6038 }
6039
6040 #[test]
6041 fn test_read_heredoc_comment_backslash_does_not_continue_tail() {
6042 let source = "cat <<EOF # note \\\nbody\nEOF\n";
6043 let mut lexer = Lexer::new(source);
6044
6045 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6046 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6047 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6048
6049 let heredoc = lexer.read_heredoc("EOF", false);
6050 assert_eq!(heredoc.content, "body\n");
6051 }
6052
6053 #[test]
6054 fn test_read_heredoc_right_paren_comment_backslash_does_not_continue_tail() {
6055 let source = "( cat <<EOF )# note \\\nbody\nEOF\n";
6056 let mut lexer = Lexer::new(source);
6057
6058 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6059 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6060 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6061 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6062
6063 let heredoc = lexer.read_heredoc("EOF", false);
6064 assert_eq!(heredoc.content, "body\n");
6065
6066 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6067 }
6068
6069 #[test]
6070 fn test_read_heredoc_blank_prefix_continues_into_operator_led_tail() {
6071 let source = "cat <<EOF \\\n| tac\n1\nEOF\n";
6072 let mut lexer = Lexer::new(source);
6073
6074 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6075 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6076 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6077
6078 let heredoc = lexer.read_heredoc("EOF", false);
6079 assert_eq!(heredoc.content, "1\n");
6080
6081 assert_next_token(&mut lexer, TokenKind::Pipe, None);
6082 assert_next_token(&mut lexer, TokenKind::Word, Some("tac"));
6083 }
6084
6085 #[test]
6086 fn test_read_heredoc_with_redirect_preserves_following_spans() {
6087 let source = "cat <<EOF > file.txt\nhello\nEOF\n# done\n";
6088 let mut lexer = Lexer::new(source);
6089
6090 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6091 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6092 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6093
6094 let heredoc = lexer.read_heredoc("EOF", false);
6095 assert_eq!(heredoc.content, "hello\n");
6096
6097 let redirect = lexer.next_lexed_token_with_comments().unwrap();
6098 assert_eq!(redirect.kind, TokenKind::RedirectOut);
6099 assert_eq!(redirect.span.slice(source), ">");
6100
6101 let target = lexer.next_lexed_token_with_comments().unwrap();
6102 assert_eq!(target.kind, TokenKind::Word);
6103 assert_eq!(
6104 token_text(&target, lexer.input).as_deref(),
6105 Some("file.txt")
6106 );
6107 assert_eq!(target.span.slice(source), "file.txt");
6108
6109 let newline = lexer.next_lexed_token_with_comments().unwrap();
6110 assert_eq!(newline.kind, TokenKind::Newline);
6111 assert_eq!(newline.span.slice(source), "\n");
6112
6113 let comment = lexer.next_lexed_token_with_comments().unwrap();
6114 assert_eq!(comment.kind, TokenKind::Comment);
6115 assert_eq!(token_text(&comment, lexer.input).as_deref(), Some(" done"));
6116 assert_eq!(comment.span.slice(source), "# done");
6117 }
6118
6119 #[test]
6120 fn test_comment_with_unicode() {
6121 let source = "# café résumé\necho ok";
6123 let mut lexer = Lexer::new(source);
6124
6125 let comment = lexer.next_lexed_token_with_comments().unwrap();
6126 assert_eq!(comment.kind, TokenKind::Comment);
6127 assert_eq!(
6128 token_text(&comment, lexer.input).as_deref(),
6129 Some(" café résumé")
6130 );
6131 let start = comment.span.start.offset;
6133 let end = comment.span.end.offset;
6134 assert_eq!(start, 0);
6135 assert_eq!(&source[start..end], "# café résumé");
6136 assert!(source.is_char_boundary(start));
6137 assert!(source.is_char_boundary(end));
6138
6139 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6140 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("echo"));
6141 }
6142
6143 #[test]
6144 fn test_comment_with_cjk_characters() {
6145 let source = "# 你好世界\necho ok";
6147 let mut lexer = Lexer::new(source);
6148
6149 let comment = lexer.next_lexed_token_with_comments().unwrap();
6150 assert_eq!(comment.kind, TokenKind::Comment);
6151 assert_eq!(
6152 token_text(&comment, lexer.input).as_deref(),
6153 Some(" 你好世界")
6154 );
6155 let start = comment.span.start.offset;
6156 let end = comment.span.end.offset;
6157 assert_eq!(&source[start..end], "# 你好世界");
6158 assert!(source.is_char_boundary(start));
6159 assert!(source.is_char_boundary(end));
6160 }
6161
6162 #[test]
6163 fn test_heredoc_with_comments_inside() {
6164 let source = "cat <<EOF\n# not a comment\nreal line\nEOF\n# real comment\n";
6166 let mut lexer = Lexer::new(source);
6167
6168 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6169 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6170 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6171
6172 let heredoc = lexer.read_heredoc("EOF", false);
6173 assert_eq!(heredoc.content, "# not a comment\nreal line\n");
6174
6175 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6178 let comment = lexer.next_lexed_token_with_comments().unwrap();
6179 assert_eq!(comment.kind, TokenKind::Comment);
6180 assert_eq!(
6181 token_text(&comment, lexer.input).as_deref(),
6182 Some(" real comment")
6183 );
6184 }
6185
6186 #[test]
6187 fn test_heredoc_with_hash_in_variable() {
6188 let source = "cat <<EOF\nval=${x#prefix}\nEOF\n";
6190 let mut lexer = Lexer::new(source);
6191
6192 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6193 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6194 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("EOF"));
6195
6196 let heredoc = lexer.read_heredoc("EOF", false);
6197 assert_eq!(heredoc.content, "val=${x#prefix}\n");
6198 }
6199
6200 #[test]
6201 fn test_heredoc_span_does_not_leak() {
6202 let source = "cat <<EOF\nhello\nworld\nEOF\necho after";
6205 let mut lexer = Lexer::new(source);
6206
6207 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6208 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6209 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6210
6211 let heredoc = lexer.read_heredoc("EOF", false);
6212 let start = heredoc.content_span.start.offset;
6213 let end = heredoc.content_span.end.offset;
6214 assert!(
6215 end <= source.len(),
6216 "heredoc span end ({end}) exceeds source length ({})",
6217 source.len()
6218 );
6219 assert_eq!(&source[start..end], "hello\nworld\n");
6220
6221 assert_next_token(&mut lexer, TokenKind::Newline, None);
6223 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6224 assert_next_token(&mut lexer, TokenKind::Word, Some("after"));
6225 }
6226
6227 #[test]
6228 fn test_quoted_heredoc_preserves_following_backtick_word_spans() {
6229 let source = "\
6230cat <<\\_ACEOF
6231Use these variables to override the choices made by `configure' or to help
6232it to find libraries and programs with nonstandard names/locations.
6233_ACEOF
6234ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`
6235ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`
6236";
6237 let mut lexer = Lexer::new(source);
6238
6239 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("cat"));
6240 assert_next_token_with_comments(&mut lexer, TokenKind::HereDoc, None);
6241 let delimiter = lexer.next_lexed_token_with_comments().unwrap();
6242 assert_eq!(delimiter.kind, TokenKind::Word);
6243 assert_eq!(delimiter.span.slice(source), "\\_ACEOF");
6244
6245 let heredoc = lexer.read_heredoc("_ACEOF", false);
6246 assert_eq!(
6247 heredoc.content,
6248 "Use these variables to override the choices made by `configure' or to help\nit to find libraries and programs with nonstandard names/locations.\n"
6249 );
6250
6251 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6252
6253 let first = lexer.next_lexed_token_with_comments().unwrap();
6254 assert_eq!(first.kind, TokenKind::Word);
6255 assert_eq!(
6256 first.span.slice(source),
6257 "ac_dir_suffix=/`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`"
6258 );
6259 let first_segments = first
6260 .word()
6261 .unwrap()
6262 .segments()
6263 .map(|segment| {
6264 (
6265 segment.kind(),
6266 segment.as_str().to_string(),
6267 segment.span().map(|span| span.slice(source).to_string()),
6268 )
6269 })
6270 .collect::<Vec<_>>();
6271 assert_eq!(
6272 first_segments,
6273 vec![
6274 (
6275 LexedWordSegmentKind::Plain,
6276 "ac_dir_suffix=/".to_string(),
6277 Some("ac_dir_suffix=/".to_string()),
6278 ),
6279 (
6280 LexedWordSegmentKind::Plain,
6281 "`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string(),
6282 Some("`$as_echo \"$ac_dir\" | sed 's|^\\.[\\\\/]||'`".to_string()),
6283 ),
6284 ]
6285 );
6286
6287 assert_next_token_with_comments(&mut lexer, TokenKind::Newline, None);
6288
6289 let second = lexer.next_lexed_token_with_comments().unwrap();
6290 assert_eq!(second.kind, TokenKind::Word);
6291 assert_eq!(
6292 second.span.slice(source),
6293 "ac_top_builddir_sub=`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6294 );
6295 let second_segments = second
6296 .word()
6297 .unwrap()
6298 .segments()
6299 .map(|segment| {
6300 (
6301 segment.kind(),
6302 segment.as_str().to_string(),
6303 segment.span().map(|span| span.slice(source).to_string()),
6304 )
6305 })
6306 .collect::<Vec<_>>();
6307 assert_eq!(
6308 second_segments,
6309 vec![
6310 (
6311 LexedWordSegmentKind::Plain,
6312 "ac_top_builddir_sub=".to_string(),
6313 Some("ac_top_builddir_sub=".to_string()),
6314 ),
6315 (
6316 LexedWordSegmentKind::Plain,
6317 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`".to_string(),
6318 Some(
6319 "`$as_echo \"$ac_dir_suffix\" | sed 's|/[^\\\\/]*|/..|g;s|/||'`"
6320 .to_string(),
6321 ),
6322 ),
6323 ]
6324 );
6325 }
6326
6327 #[test]
6328 fn test_heredoc_with_unicode_content() {
6329 let source = "cat <<EOF\n# 你好\ncafé\nEOF\n";
6331 let mut lexer = Lexer::new(source);
6332
6333 assert_next_token(&mut lexer, TokenKind::Word, Some("cat"));
6334 assert_next_token(&mut lexer, TokenKind::HereDoc, None);
6335 assert_next_token(&mut lexer, TokenKind::Word, Some("EOF"));
6336
6337 let heredoc = lexer.read_heredoc("EOF", false);
6338 assert_eq!(heredoc.content, "# 你好\ncafé\n");
6339 let start = heredoc.content_span.start.offset;
6340 let end = heredoc.content_span.end.offset;
6341 assert!(
6342 source.is_char_boundary(start),
6343 "heredoc span start ({start}) not on char boundary"
6344 );
6345 assert!(
6346 source.is_char_boundary(end),
6347 "heredoc span end ({end}) not on char boundary"
6348 );
6349 assert_eq!(&source[start..end], "# 你好\ncafé\n");
6350 }
6351
6352 #[test]
6353 fn test_assoc_compound_assignment() {
6354 let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#);
6357 assert_next_token(
6358 &mut lexer,
6359 TokenKind::Word,
6360 Some(r#"m=([foo]="bar" [baz]="qux")"#),
6361 );
6362 assert!(lexer.next_lexed_token().is_none());
6363 }
6364
6365 #[test]
6366 fn test_assoc_compound_assignment_after_escaped_literal_keeps_compound_word() {
6367 let source = r#"foo\_bar=([foo]="bar" [baz]="qux")"#;
6368 let mut lexer = Lexer::new(source);
6369
6370 let token = lexer.next_lexed_token().unwrap();
6371 assert_eq!(token.kind, TokenKind::Word);
6372 assert_eq!(token.span.slice(source), source);
6373 assert!(lexer.next_lexed_token().is_none());
6374 }
6375
6376 #[test]
6377 fn test_extglob_after_escaped_literal_keeps_suffix_group() {
6378 let source = r#"foo\_bar@(baz|qux)"#;
6379 let mut lexer = Lexer::new(source);
6380
6381 let token = lexer.next_lexed_token().unwrap();
6382 assert_eq!(token.kind, TokenKind::Word);
6383 assert_eq!(token.span.slice(source), source);
6384 assert!(lexer.next_lexed_token().is_none());
6385 }
6386
6387 #[test]
6388 fn test_indexed_array_not_collapsed() {
6389 let mut lexer = Lexer::new(r#"arr=("hello world")"#);
6392 assert_next_token(&mut lexer, TokenKind::Word, Some("arr="));
6393 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6394 }
6395
6396 #[test]
6397 fn test_array_element_with_quoted_prefix_zsh_glob_qualifier_stays_one_word() {
6398 let source = r#"plugins=( "$plugin_dir"/*(:t) )"#;
6399 let mut lexer = Lexer::new(source);
6400
6401 assert_next_token(&mut lexer, TokenKind::Word, Some("plugins="));
6402 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6403
6404 let token = lexer.next_lexed_token().unwrap();
6405 assert_eq!(token.kind, TokenKind::Word);
6406 assert_eq!(token.span.slice(source), r#""$plugin_dir"/*(:t)"#);
6407
6408 let word = token.word().unwrap();
6409 let segments: Vec<_> = word
6410 .segments()
6411 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6412 .collect();
6413 assert_eq!(
6414 segments,
6415 vec![
6416 (
6417 LexedWordSegmentKind::DoubleQuoted,
6418 "$plugin_dir".to_string()
6419 ),
6420 (LexedWordSegmentKind::Plain, "/*".to_string()),
6421 (LexedWordSegmentKind::Plain, "(:t)".to_string()),
6422 ]
6423 );
6424
6425 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6426 assert!(lexer.next_lexed_token().is_none());
6427 }
6428
6429 #[test]
6430 fn test_array_element_with_quoted_variable_zsh_qualifier_stays_one_word() {
6431 let source = r#"__GREP_ALIAS_CACHES=( "$__GREP_CACHE_FILE"(Nm-1) )"#;
6432 let mut lexer = Lexer::new(source);
6433
6434 assert_next_token(&mut lexer, TokenKind::Word, Some("__GREP_ALIAS_CACHES="));
6435 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6436
6437 let token = lexer.next_lexed_token().unwrap();
6438 assert_eq!(token.kind, TokenKind::Word);
6439 assert_eq!(token.span.slice(source), r#""$__GREP_CACHE_FILE"(Nm-1)"#);
6440
6441 let word = token.word().unwrap();
6442 let segments: Vec<_> = word
6443 .segments()
6444 .map(|segment| (segment.kind(), segment.as_str().to_string()))
6445 .collect();
6446 assert_eq!(
6447 segments,
6448 vec![
6449 (
6450 LexedWordSegmentKind::DoubleQuoted,
6451 "$__GREP_CACHE_FILE".to_string()
6452 ),
6453 (LexedWordSegmentKind::Plain, "(Nm-1)".to_string()),
6454 ]
6455 );
6456
6457 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6458 assert!(lexer.next_lexed_token().is_none());
6459 }
6460
6461 #[test]
6462 fn test_parameter_expansion_with_zsh_qualifier_stays_single_word() {
6463 let source = r#"$dir/${~pats}(N)"#;
6464 let mut lexer = Lexer::new(source);
6465
6466 let token = lexer.next_lexed_token().unwrap();
6467 assert_eq!(token.kind, TokenKind::Word);
6468 assert_eq!(token.span.slice(source), source);
6469 assert!(lexer.next_lexed_token().is_none());
6470 }
6471
6472 #[test]
6473 fn test_dollar_word_does_not_absorb_function_parens() {
6474 let mut lexer = Lexer::new(r#"foo$x()"#);
6475
6476 assert_next_token(&mut lexer, TokenKind::Word, Some("foo$x"));
6477 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6478 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6479 assert!(lexer.next_lexed_token().is_none());
6480 }
6481
6482 #[test]
6483 fn test_command_substitution_word_does_not_absorb_function_parens() {
6484 let mut lexer = Lexer::new(r#"foo-$(echo hi)()"#);
6485
6486 assert_next_token(&mut lexer, TokenKind::Word, Some("foo-$(echo hi)"));
6487 assert_next_token(&mut lexer, TokenKind::LeftParen, None);
6488 assert_next_token(&mut lexer, TokenKind::RightParen, None);
6489 assert!(lexer.next_lexed_token().is_none());
6490 }
6491
6492 #[test]
6495 fn test_digit_at_eof_no_panic() {
6496 let mut lexer = Lexer::new("2");
6498 let token = lexer.next_lexed_token();
6499 assert!(token.is_some());
6500 }
6501
6502 #[test]
6504 fn test_nested_brace_expansion_single_token() {
6505 let mut lexer = Lexer::new("${arr[${#arr[@]} - 1]}");
6507 assert_next_token(&mut lexer, TokenKind::Word, Some("${arr[${#arr[@]} - 1]}"));
6508 assert!(lexer.next_lexed_token().is_none());
6510 }
6511
6512 #[test]
6514 fn test_simple_brace_expansion_unchanged() {
6515 let mut lexer = Lexer::new("${foo}");
6516 assert_next_token(&mut lexer, TokenKind::Word, Some("${foo}"));
6517 assert!(lexer.next_lexed_token().is_none());
6518 }
6519
6520 #[test]
6521 fn test_nvm_fixture_lexes_without_stalling() {
6522 let input = include_str!("../../../shuck-benchmark/resources/files/nvm.sh");
6523 let mut lexer = Lexer::new(input);
6524 let mut tokens = 0usize;
6525
6526 while lexer.next_lexed_token().is_some() {
6527 tokens += 1;
6528 assert!(
6529 tokens < 100_000,
6530 "lexer should continue making progress on the nvm fixture"
6531 );
6532 }
6533
6534 assert!(tokens > 0, "nvm fixture should produce at least one token");
6535 }
6536
6537 #[test]
6538 fn test_case_arm_with_quoted_space_substitution_stays_line_local() {
6539 let input = concat!(
6540 "case \"${_input_type:-}\" in\n",
6541 " html) _hashtag_pattern=\"<a\\ href=\\\"${_hashtag_replacement_url//' '/%20}\\\">\\#\\\\2<\\/a>\" ;;\n",
6542 " org) _hashtag_pattern=\"[[${_hashtag_replacement_url//' '/%20}][\\#\\\\2]]\" ;;\n",
6543 "esac\n",
6544 );
6545
6546 assert_non_newline_tokens_stay_on_one_line(input);
6547
6548 let mut lexer = Lexer::new(input);
6549 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6550 .map(|token| (token.kind, token_text(&token, input)))
6551 .collect::<Vec<_>>();
6552 assert!(tokens.contains(&(TokenKind::DoubleSemicolon, None)));
6553 assert!(tokens.contains(&(TokenKind::Word, Some("esac".to_string()))));
6554 }
6555
6556 #[test]
6557 fn test_case_arm_with_zsh_semipipe_terminator_lexes_as_single_token() {
6558 let input = concat!(
6559 "case $2 in\n",
6560 " cygwin*) bin='cygwin32/bin' ;|\n",
6561 "esac\n",
6562 );
6563
6564 let mut lexer = Lexer::new(input);
6565 let tokens = std::iter::from_fn(|| lexer.next_lexed_token())
6566 .map(|token| (token.kind, token_text(&token, input)))
6567 .collect::<Vec<_>>();
6568
6569 assert!(tokens.contains(&(TokenKind::SemiPipe, None)));
6570 assert!(!tokens.contains(&(TokenKind::Semicolon, None)));
6571 assert!(!tokens.contains(&(TokenKind::Pipe, None)));
6572 }
6573
6574 #[test]
6575 fn test_inline_if_with_array_append_stays_line_local() {
6576 let input = concat!(
6577 "if [[ -n $arr ]]; then pyout+=(\"${output}\")\n",
6578 "elif [[ -n $var ]]; then pyout+=\"${output}${ln:+\\n}\"; fi\n",
6579 );
6580
6581 assert_non_newline_tokens_stay_on_one_line(input);
6582 }
6583
6584 #[test]
6585 fn test_zsh_midfile_unsetopt_interactive_comments_keeps_hash_as_word() {
6586 let source = "unsetopt interactive_comments\n#literal\n";
6587 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6588 let mut lexer = Lexer::with_profile(source, &profile);
6589
6590 assert_next_token(&mut lexer, TokenKind::Word, Some("unsetopt"));
6591 assert_next_token(&mut lexer, TokenKind::Word, Some("interactive_comments"));
6592 assert_next_token(&mut lexer, TokenKind::Newline, None);
6593 assert_next_token_with_comments(&mut lexer, TokenKind::Word, Some("#literal"));
6594 }
6595
6596 #[test]
6597 fn test_zsh_midfile_setopt_rc_quotes_merges_adjacent_single_quotes() {
6598 let source = "setopt rc_quotes\nprint 'a''b'\n";
6599 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6600 let mut lexer = Lexer::with_profile(source, &profile);
6601
6602 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6603 assert_next_token(&mut lexer, TokenKind::Word, Some("rc_quotes"));
6604 assert_next_token(&mut lexer, TokenKind::Newline, None);
6605 assert_next_token(&mut lexer, TokenKind::Word, Some("print"));
6606 assert_next_token(&mut lexer, TokenKind::LiteralWord, Some("a'b"));
6607 }
6608
6609 #[test]
6610 fn test_zsh_midfile_setopt_ignore_braces_lexes_braces_as_words() {
6611 let source = "setopt ignore_braces\n{ echo }\n";
6612 let profile = ShellProfile::native(crate::parser::ShellDialect::Zsh);
6613 let mut lexer = Lexer::with_profile(source, &profile);
6614
6615 assert_next_token(&mut lexer, TokenKind::Word, Some("setopt"));
6616 assert_next_token(&mut lexer, TokenKind::Word, Some("ignore_braces"));
6617 assert_next_token(&mut lexer, TokenKind::Newline, None);
6618 assert_next_token(&mut lexer, TokenKind::Word, Some("{"));
6619 assert_next_token(&mut lexer, TokenKind::Word, Some("echo"));
6620 assert_next_token(&mut lexer, TokenKind::Word, Some("}"));
6621 }
6622
6623 #[test]
6624 fn test_heredoc_in_arithmetic_fuzz_crash() {
6625 let data: &[u8] = &[
6629 35, 33, 111, 98, 105, 110, 41, 41, 10, 40, 40, 32, 36, 111, 98, 105, 110, 41, 41, 10,
6630 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4,
6631 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119,
6632 119, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0,
6633 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110, 119, 119, 49, 32, 119, 119, 109,
6634 119, 119, 110, 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39,
6635 122, 122, 122, 122, 122, 122, 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6636 122, 40, 122, 122, 122, 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
6637 122, 122, 122, 0, 53, 32, 43, 32, 49, 32, 41, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32,
6638 49, 32, 6, 0, 0, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33, 61, 26, 40, 40, 32, 110,
6639 119, 119, 49, 32, 119, 119, 109, 119, 119, 119, 119, 119, 119, 122, 39, 122, 122, 122,
6640 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 0, 0, 0, 0, 41, 60, 60, 69, 41, 4, 33,
6641 61, 26, 40, 40, 32, 110, 119, 119, 48, 32, 119, 119, 109, 119, 119, 110, 119, 119, 49,
6642 32, 119, 119, 109, 119, 119, 119, 0, 14, 119, 122, 39, 122, 122, 122, 122, 122, 122,
6643 122, 47, 33, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 40, 122, 122, 122, 122,
6644 39, 122, 122, 122, 122, 122, 122, 122, 88, 88, 88, 88, 122, 122, 40, 122, 122, 122,
6645 122, 39, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 0, 53,
6646 32, 43, 32, 49, 32, 53, 41, 10, 40, 40, 32, 36, 53, 32, 43, 32, 49, 32, 6, 0, 0, 0, 0,
6647 0, 0, 0, 41, 60, 60, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0,
6648 ];
6649 let input = std::str::from_utf8(data).unwrap();
6650 let script = format!("echo $(({input}))\n");
6651 let _ = crate::parser::Parser::new(&script).parse();
6653 }
6654}