1use logos::{Logos, Span};
17use std::fmt;
18use std::sync::atomic::{AtomicU64, Ordering};
19use std::time::{SystemTime, UNIX_EPOCH};
20
21static MARKER_COUNTER: AtomicU64 = AtomicU64::new(0);
23
24const MAX_PAREN_DEPTH: usize = 256;
27
28#[derive(Debug, Clone)]
32struct SpanReplacement {
33 preprocessed_pos: usize,
35 marker_len: usize,
37 original_len: usize,
39}
40
41fn correct_span(span: Span, replacements: &[SpanReplacement]) -> Span {
43 let mut start_adjustment: isize = 0;
44 let mut end_adjustment: isize = 0;
45
46 for r in replacements {
47 let delta = r.original_len as isize - r.marker_len as isize;
49
50 if span.start > r.preprocessed_pos + r.marker_len {
52 start_adjustment += delta;
53 } else if span.start > r.preprocessed_pos {
54 start_adjustment += delta;
57 }
58
59 if span.end > r.preprocessed_pos + r.marker_len {
61 end_adjustment += delta;
62 } else if span.end > r.preprocessed_pos {
63 end_adjustment += delta;
65 }
66 }
67
68 let new_start = (span.start as isize + start_adjustment).max(0) as usize;
69 let new_end = (span.end as isize + end_adjustment).max(new_start as isize) as usize;
70 new_start..new_end
71}
72
73fn unique_marker_id() -> String {
76 let timestamp = SystemTime::now()
77 .duration_since(UNIX_EPOCH)
78 .map(|d| d.as_nanos())
79 .unwrap_or(0);
80 let counter = MARKER_COUNTER.fetch_add(1, Ordering::Relaxed);
81 #[cfg(target_os = "wasi")]
82 let pid = 0u32;
83 #[cfg(not(target_os = "wasi"))]
84 let pid = std::process::id();
85 format!("{:x}_{:x}_{:x}", timestamp, counter, pid)
86}
87
88#[derive(Debug, Clone, PartialEq)]
90pub struct Spanned<T> {
91 pub token: T,
92 pub span: Span,
93}
94
95impl<T> Spanned<T> {
96 pub fn new(token: T, span: Span) -> Self {
97 Self { token, span }
98 }
99}
100
101#[derive(Debug, Clone, PartialEq, Default)]
103pub enum LexerError {
104 #[default]
105 UnexpectedCharacter,
106 UnterminatedString,
107 UnterminatedVarRef,
108 InvalidEscape,
109 InvalidNumber,
110 AmbiguousBoolean(String),
111 AmbiguousBooleanLike(String),
112 InvalidFloatNoLeading,
113 InvalidFloatNoTrailing,
114 NestingTooDeep,
116 UnterminatedHeredoc { delimiter: String },
120 BackticksNotSupported,
125}
126
127impl fmt::Display for LexerError {
128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 match self {
130 LexerError::UnexpectedCharacter => write!(f, "unexpected character"),
131 LexerError::UnterminatedString => write!(f, "unterminated string"),
132 LexerError::UnterminatedVarRef => write!(f, "unterminated variable reference"),
133 LexerError::InvalidEscape => write!(f, "invalid escape sequence"),
134 LexerError::InvalidNumber => write!(f, "invalid number"),
135 LexerError::AmbiguousBoolean(s) => {
136 write!(f, "ambiguous boolean, use lowercase '{}'", s.to_lowercase())
137 }
138 LexerError::AmbiguousBooleanLike(s) => {
139 let suggest = if s.eq_ignore_ascii_case("yes") { "true" } else { "false" };
140 write!(f, "ambiguous boolean-like '{}', use '{}' or '\"{}\"'", s, suggest, s)
141 }
142 LexerError::InvalidFloatNoLeading => write!(f, "float must have leading digit"),
143 LexerError::InvalidFloatNoTrailing => write!(f, "float must have trailing digit"),
144 LexerError::NestingTooDeep => write!(f, "nesting depth exceeded (max {})", MAX_PAREN_DEPTH),
145 LexerError::UnterminatedHeredoc { delimiter } => {
146 write!(f, "unterminated heredoc, expected closing delimiter `{}` on its own line", delimiter)
147 }
148 LexerError::BackticksNotSupported => {
149 write!(f, "backticks are not supported in kaish; use $(cmd) instead")
150 }
151 }
152 }
153}
154
155#[derive(Debug, Clone, PartialEq)]
178pub struct HereDocData {
179 pub content: String,
180 pub literal: bool,
181 pub strip_tabs: bool,
182 pub body_start_offset: usize,
183}
184
185#[derive(Logos, Debug, Clone, PartialEq)]
186#[logos(error = LexerError)]
187#[logos(skip r"[ \t]+")]
188pub enum Token {
189 #[token("set")]
193 Set,
194
195 #[token("local")]
196 Local,
197
198 #[token("if")]
199 If,
200
201 #[token("then")]
202 Then,
203
204 #[token("else")]
205 Else,
206
207 #[token("elif")]
208 Elif,
209
210 #[token("fi")]
211 Fi,
212
213 #[token("for")]
214 For,
215
216 #[token("while")]
217 While,
218
219 #[token("in")]
220 In,
221
222 #[token("do")]
223 Do,
224
225 #[token("done")]
226 Done,
227
228 #[token("case")]
229 Case,
230
231 #[token("esac")]
232 Esac,
233
234 #[token("function")]
235 Function,
236
237 #[token("break")]
238 Break,
239
240 #[token("continue")]
241 Continue,
242
243 #[token("return")]
244 Return,
245
246 #[token("exit")]
247 Exit,
248
249 #[token("true")]
250 True,
251
252 #[token("false")]
253 False,
254
255 #[token("string")]
259 TypeString,
260
261 #[token("int")]
262 TypeInt,
263
264 #[token("float")]
265 TypeFloat,
266
267 #[token("bool")]
268 TypeBool,
269
270 #[token("&&")]
274 And,
275
276 #[token("||")]
277 Or,
278
279 #[token("==")]
280 EqEq,
281
282 #[token("!=")]
283 NotEq,
284
285 #[token("=~")]
286 Match,
287
288 #[token("!~")]
289 NotMatch,
290
291 #[token(">=")]
292 GtEq,
293
294 #[token("<=")]
295 LtEq,
296
297 #[token(">>")]
298 GtGt,
299
300 #[token("2>&1")]
301 StderrToStdout,
302
303 #[token("1>&2")]
304 StdoutToStderr,
305
306 #[token(">&2")]
307 StdoutToStderr2,
308
309 #[token("2>")]
310 Stderr,
311
312 #[token("&>")]
313 Both,
314
315 #[token("<<<")]
316 HereString,
317
318 #[token("<<")]
319 HereDocStart,
320
321 #[token(";;")]
322 DoubleSemi,
323
324 #[token("=")]
328 Eq,
329
330 #[token("|")]
331 Pipe,
332
333 #[token("&")]
334 Amp,
335
336 #[token(">")]
337 Gt,
338
339 #[token("<")]
340 Lt,
341
342 #[token(";")]
343 Semi,
344
345 #[token(":")]
346 Colon,
347
348 #[token(",")]
349 Comma,
350
351 #[token("..")]
352 DotDot,
353
354 #[token(".")]
355 Dot,
356
357 #[regex(r"~[a-zA-Z0-9_./+-]+", lex_tilde_path, priority = 3)]
359 TildePath(String),
360
361 #[token("~")]
363 Tilde,
364
365 #[regex(r"\.\./[a-zA-Z0-9_./-]+", lex_relative_path, priority = 3)]
371 #[regex(r"[a-zA-Z_][a-zA-Z0-9_.-]*/[a-zA-Z0-9_./-]*", lex_relative_path, priority = 3)]
372 RelativePath(String),
373
374 #[regex(r"\./[a-zA-Z0-9_./-]+", lex_dot_slash_path, priority = 3)]
376 DotSlashPath(String),
377
378 #[regex(r"\.[a-zA-Z_][a-zA-Z0-9_.-]*", lex_dotted_ident, priority = 3)]
384 DottedIdent(String),
385
386 #[token("{")]
387 LBrace,
388
389 #[token("}")]
390 RBrace,
391
392 #[token("[")]
393 LBracket,
394
395 #[token("]")]
396 RBracket,
397
398 #[token("(")]
399 LParen,
400
401 #[token(")")]
402 RParen,
403
404 #[token("*")]
405 Star,
406
407 #[token("!")]
408 Bang,
409
410 #[token("?")]
411 Question,
412
413 GlobWord(String),
416
417 Arithmetic(String),
424
425 #[token("$(")]
427 CmdSubstStart,
428
429 #[regex(r"--[a-zA-Z][a-zA-Z0-9-]*", lex_long_flag, priority = 3)]
435 LongFlag(String),
436
437 #[regex(r"-[a-zA-Z][a-zA-Z0-9-]*", lex_short_flag, priority = 3)]
444 ShortFlag(String),
445
446 #[regex(r"\+[a-zA-Z][a-zA-Z0-9]*", lex_plus_flag, priority = 3)]
448 PlusFlag(String),
449
450 #[token("--")]
452 DoubleDash,
453
454 #[regex(r"\+[^a-zA-Z\s][^\s]*", lex_plus_bare, priority = 2)]
457 PlusBare(String),
458
459 #[regex(r"-[^a-zA-Z0-9\s\-][^\s]*", lex_minus_bare, priority = 1)]
463 MinusBare(String),
464
465 #[regex(r"%[0-9]+", lex_job_spec)]
469 JobSpec(String),
470
471 #[token("-")]
475 MinusAlone,
476
477 #[regex(r#""([^"\\]|\\.)*""#, lex_string)]
483 String(String),
484
485 #[regex(r"'[^']*'", lex_single_string)]
487 SingleString(String),
488
489 #[regex(r"\$\{[^}]+\}", lex_varref)]
491 VarRef(String),
492
493 #[regex(r"\$[a-zA-Z_][a-zA-Z0-9_]*", lex_simple_varref)]
495 SimpleVarRef(String),
496
497 #[regex(r"\$[0-9]", lex_positional)]
499 Positional(usize),
500
501 #[token("$@")]
503 AllArgs,
504
505 #[token("$#")]
507 ArgCount,
508
509 #[token("$?")]
511 LastExitCode,
512
513 #[token("$$")]
515 CurrentPid,
516
517 #[regex(r"\$\{#[a-zA-Z_][a-zA-Z0-9_]*\}", lex_var_length)]
519 VarLength(String),
520
521 HereDoc(HereDocData),
524
525 #[regex(r"-?[0-9]+", lex_int, priority = 2)]
527 Int(i64),
528
529 #[regex(r"-?[0-9]+\.[0-9]+", lex_float)]
531 Float(f64),
532
533 #[regex(r"[0-9]+[a-zA-Z_][a-zA-Z0-9_.-]*", lex_number_ident, priority = 3)]
542 NumberIdent(String),
543
544 #[regex(r"\.[0-9]+", lex_invalid_float_no_leading, priority = 3)]
546 InvalidFloatNoLeading,
547
548 #[regex(r"[0-9]+\.", lex_invalid_float_no_trailing, priority = 2)]
551 InvalidFloatNoTrailing,
552
553 #[regex(r"/[a-zA-Z0-9_./+-]*", lex_path)]
559 Path(String),
560
561 #[regex(r"[a-zA-Z_][a-zA-Z0-9_.-]*", lex_ident)]
568 Ident(String),
569
570 #[regex(r"#[^\n\r]*", allow_greedy = true)]
576 Comment,
577
578 #[regex(r"\n|\r\n")]
580 Newline,
581
582 #[regex(r"\\[ \t]*(\n|\r\n)")]
584 LineContinuation,
585
586 #[token("`", reject_backtick)]
595 BacktickRejected,
596}
597
598#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
603pub enum TokenCategory {
604 Keyword,
606 Operator,
608 String,
610 Number,
612 Variable,
614 Comment,
616 Punctuation,
618 Command,
620 Path,
622 Flag,
624 Error,
626}
627
628impl Token {
629 pub fn category(&self) -> TokenCategory {
631 match self {
632 Token::If
634 | Token::Then
635 | Token::Else
636 | Token::Elif
637 | Token::Fi
638 | Token::For
639 | Token::In
640 | Token::Do
641 | Token::Done
642 | Token::While
643 | Token::Case
644 | Token::Esac
645 | Token::Function
646 | Token::Return
647 | Token::Break
648 | Token::Continue
649 | Token::Exit
650 | Token::Set
651 | Token::Local
652 | Token::True
653 | Token::False
654 | Token::TypeString
655 | Token::TypeInt
656 | Token::TypeFloat
657 | Token::TypeBool => TokenCategory::Keyword,
658
659 Token::Pipe
661 | Token::And
662 | Token::Or
663 | Token::Amp
664 | Token::Eq
665 | Token::EqEq
666 | Token::NotEq
667 | Token::Match
668 | Token::NotMatch
669 | Token::Lt
670 | Token::Gt
671 | Token::LtEq
672 | Token::GtEq
673 | Token::GtGt
674 | Token::Stderr
675 | Token::Both
676 | Token::HereDocStart
677 | Token::HereString
678 | Token::StderrToStdout
679 | Token::StdoutToStderr
680 | Token::StdoutToStderr2 => TokenCategory::Operator,
681
682 Token::String(_) | Token::SingleString(_) | Token::HereDoc(_) => TokenCategory::String,
684
685 Token::Int(_) | Token::Float(_) | Token::Arithmetic(_) => TokenCategory::Number,
687
688 Token::VarRef(_)
690 | Token::SimpleVarRef(_)
691 | Token::Positional(_)
692 | Token::AllArgs
693 | Token::ArgCount
694 | Token::VarLength(_)
695 | Token::LastExitCode
696 | Token::CurrentPid => TokenCategory::Variable,
697
698 Token::LongFlag(_)
700 | Token::ShortFlag(_)
701 | Token::PlusFlag(_)
702 | Token::DoubleDash => TokenCategory::Flag,
703
704 Token::Semi
706 | Token::DoubleSemi
707 | Token::Colon
708 | Token::Comma
709 | Token::Dot
710 | Token::LParen
711 | Token::RParen
712 | Token::LBrace
713 | Token::RBrace
714 | Token::LBracket
715 | Token::RBracket
716 | Token::Bang
717 | Token::Question
718 | Token::Star
719 | Token::Newline
720 | Token::LineContinuation
721 | Token::CmdSubstStart => TokenCategory::Punctuation,
722
723 Token::GlobWord(_) => TokenCategory::Path,
725
726 Token::Comment => TokenCategory::Comment,
728
729 Token::Path(_)
731 | Token::TildePath(_)
732 | Token::RelativePath(_)
733 | Token::Tilde
734 | Token::DotDot
735 | Token::DotSlashPath(_) => TokenCategory::Path,
736
737 Token::Ident(_)
739 | Token::PlusBare(_)
740 | Token::MinusBare(_)
741 | Token::MinusAlone
742 | Token::NumberIdent(_)
743 | Token::DottedIdent(_)
744 | Token::JobSpec(_) => TokenCategory::Command,
745
746 Token::InvalidFloatNoLeading
748 | Token::InvalidFloatNoTrailing
749 | Token::BacktickRejected => TokenCategory::Error,
750 }
751 }
752}
753
754fn lex_string(lex: &mut logos::Lexer<Token>) -> Result<String, LexerError> {
756 parse_string_literal(lex.slice())
757}
758
759fn lex_single_string(lex: &mut logos::Lexer<Token>) -> String {
761 let s = lex.slice();
762 s[1..s.len() - 1].to_string()
764}
765
766fn lex_varref(lex: &mut logos::Lexer<Token>) -> String {
768 lex.slice().to_string()
770}
771
772fn lex_simple_varref(lex: &mut logos::Lexer<Token>) -> String {
774 lex.slice()[1..].to_string()
776}
777
778fn lex_positional(lex: &mut logos::Lexer<Token>) -> usize {
780 lex.slice()[1..].parse().unwrap_or(0)
782}
783
784fn lex_var_length(lex: &mut logos::Lexer<Token>) -> String {
786 let s = lex.slice();
788 s[3..s.len() - 1].to_string()
789}
790
791fn lex_int(lex: &mut logos::Lexer<Token>) -> Result<i64, LexerError> {
793 lex.slice().parse().map_err(|_| LexerError::InvalidNumber)
794}
795
796fn lex_float(lex: &mut logos::Lexer<Token>) -> Result<f64, LexerError> {
798 lex.slice().parse().map_err(|_| LexerError::InvalidNumber)
799}
800
801fn lex_number_ident(lex: &mut logos::Lexer<Token>) -> String {
805 lex.slice().to_string()
806}
807
808fn lex_dotted_ident(lex: &mut logos::Lexer<Token>) -> String {
810 lex.slice().to_string()
811}
812
813fn lex_invalid_float_no_leading(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
816 Err(LexerError::InvalidFloatNoLeading)
817}
818
819fn reject_backtick(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
823 Err(LexerError::BackticksNotSupported)
824}
825
826fn lex_invalid_float_no_trailing(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
829 Err(LexerError::InvalidFloatNoTrailing)
830}
831
832fn lex_ident(lex: &mut logos::Lexer<Token>) -> Result<String, LexerError> {
834 let s = lex.slice();
835
836 match s.to_lowercase().as_str() {
839 "true" | "false" if s != "true" && s != "false" => {
840 return Err(LexerError::AmbiguousBoolean(s.to_string()));
841 }
842 _ => {}
843 }
844
845 if s.eq_ignore_ascii_case("yes") || s.eq_ignore_ascii_case("no") {
847 return Err(LexerError::AmbiguousBooleanLike(s.to_string()));
848 }
849
850 Ok(s.to_string())
851}
852
853fn lex_long_flag(lex: &mut logos::Lexer<Token>) -> String {
855 lex.slice()[2..].to_string()
857}
858
859fn lex_short_flag(lex: &mut logos::Lexer<Token>) -> String {
861 lex.slice()[1..].to_string()
863}
864
865fn lex_plus_flag(lex: &mut logos::Lexer<Token>) -> String {
867 lex.slice()[1..].to_string()
869}
870
871fn lex_plus_bare(lex: &mut logos::Lexer<Token>) -> String {
873 lex.slice().to_string()
874}
875
876fn lex_minus_bare(lex: &mut logos::Lexer<Token>) -> String {
878 lex.slice().to_string()
879}
880
881fn lex_job_spec(lex: &mut logos::Lexer<Token>) -> String {
883 lex.slice().to_string()
884}
885
886fn lex_path(lex: &mut logos::Lexer<Token>) -> String {
888 lex.slice().to_string()
889}
890
891fn lex_tilde_path(lex: &mut logos::Lexer<Token>) -> String {
893 lex.slice().to_string()
894}
895
896fn lex_relative_path(lex: &mut logos::Lexer<Token>) -> String {
898 lex.slice().to_string()
899}
900
901fn lex_dot_slash_path(lex: &mut logos::Lexer<Token>) -> String {
903 lex.slice().to_string()
904}
905
906impl fmt::Display for Token {
907 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
908 match self {
909 Token::Set => write!(f, "set"),
910 Token::Local => write!(f, "local"),
911 Token::If => write!(f, "if"),
912 Token::Then => write!(f, "then"),
913 Token::Else => write!(f, "else"),
914 Token::Elif => write!(f, "elif"),
915 Token::Fi => write!(f, "fi"),
916 Token::For => write!(f, "for"),
917 Token::While => write!(f, "while"),
918 Token::In => write!(f, "in"),
919 Token::Do => write!(f, "do"),
920 Token::Done => write!(f, "done"),
921 Token::Case => write!(f, "case"),
922 Token::Esac => write!(f, "esac"),
923 Token::Function => write!(f, "function"),
924 Token::Break => write!(f, "break"),
925 Token::Continue => write!(f, "continue"),
926 Token::Return => write!(f, "return"),
927 Token::Exit => write!(f, "exit"),
928 Token::True => write!(f, "true"),
929 Token::False => write!(f, "false"),
930 Token::TypeString => write!(f, "string"),
931 Token::TypeInt => write!(f, "int"),
932 Token::TypeFloat => write!(f, "float"),
933 Token::TypeBool => write!(f, "bool"),
934 Token::And => write!(f, "&&"),
935 Token::Or => write!(f, "||"),
936 Token::EqEq => write!(f, "=="),
937 Token::NotEq => write!(f, "!="),
938 Token::Match => write!(f, "=~"),
939 Token::NotMatch => write!(f, "!~"),
940 Token::GtEq => write!(f, ">="),
941 Token::LtEq => write!(f, "<="),
942 Token::GtGt => write!(f, ">>"),
943 Token::StderrToStdout => write!(f, "2>&1"),
944 Token::StdoutToStderr => write!(f, "1>&2"),
945 Token::StdoutToStderr2 => write!(f, ">&2"),
946 Token::Stderr => write!(f, "2>"),
947 Token::Both => write!(f, "&>"),
948 Token::HereDocStart => write!(f, "<<"),
949 Token::HereString => write!(f, "<<<"),
950 Token::DoubleSemi => write!(f, ";;"),
951 Token::Eq => write!(f, "="),
952 Token::Pipe => write!(f, "|"),
953 Token::Amp => write!(f, "&"),
954 Token::Gt => write!(f, ">"),
955 Token::Lt => write!(f, "<"),
956 Token::Semi => write!(f, ";"),
957 Token::Colon => write!(f, ":"),
958 Token::Comma => write!(f, ","),
959 Token::Dot => write!(f, "."),
960 Token::DotDot => write!(f, ".."),
961 Token::Tilde => write!(f, "~"),
962 Token::TildePath(s) => write!(f, "{}", s),
963 Token::RelativePath(s) => write!(f, "{}", s),
964 Token::DotSlashPath(s) => write!(f, "{}", s),
965 Token::LBrace => write!(f, "{{"),
966 Token::RBrace => write!(f, "}}"),
967 Token::LBracket => write!(f, "["),
968 Token::RBracket => write!(f, "]"),
969 Token::LParen => write!(f, "("),
970 Token::RParen => write!(f, ")"),
971 Token::Star => write!(f, "*"),
972 Token::Bang => write!(f, "!"),
973 Token::Question => write!(f, "?"),
974 Token::GlobWord(s) => write!(f, "GLOB({})", s),
975 Token::Arithmetic(s) => write!(f, "ARITHMETIC({})", s),
976 Token::CmdSubstStart => write!(f, "$("),
977 Token::LongFlag(s) => write!(f, "--{}", s),
978 Token::ShortFlag(s) => write!(f, "-{}", s),
979 Token::PlusFlag(s) => write!(f, "+{}", s),
980 Token::DoubleDash => write!(f, "--"),
981 Token::PlusBare(s) => write!(f, "{}", s),
982 Token::MinusBare(s) => write!(f, "{}", s),
983 Token::JobSpec(s) => write!(f, "{}", s),
984 Token::MinusAlone => write!(f, "-"),
985 Token::String(s) => write!(f, "STRING({:?})", s),
986 Token::SingleString(s) => write!(f, "SINGLESTRING({:?})", s),
987 Token::HereDoc(d) => write!(f, "HEREDOC({:?}, literal={})", d.content, d.literal),
988 Token::VarRef(v) => write!(f, "VARREF({})", v),
989 Token::SimpleVarRef(v) => write!(f, "SIMPLEVARREF({})", v),
990 Token::Positional(n) => write!(f, "${}", n),
991 Token::AllArgs => write!(f, "$@"),
992 Token::ArgCount => write!(f, "$#"),
993 Token::LastExitCode => write!(f, "$?"),
994 Token::CurrentPid => write!(f, "$$"),
995 Token::VarLength(v) => write!(f, "${{#{}}}", v),
996 Token::Int(n) => write!(f, "INT({})", n),
997 Token::Float(n) => write!(f, "FLOAT({})", n),
998 Token::Path(s) => write!(f, "PATH({})", s),
999 Token::Ident(s) => write!(f, "IDENT({})", s),
1000 Token::NumberIdent(s) => write!(f, "NUMIDENT({})", s),
1001 Token::DottedIdent(s) => write!(f, "DOTIDENT({})", s),
1002 Token::Comment => write!(f, "COMMENT"),
1003 Token::Newline => write!(f, "NEWLINE"),
1004 Token::LineContinuation => write!(f, "LINECONT"),
1005 Token::InvalidFloatNoLeading => write!(f, "INVALID_FLOAT_NO_LEADING"),
1007 Token::InvalidFloatNoTrailing => write!(f, "INVALID_FLOAT_NO_TRAILING"),
1008 Token::BacktickRejected => write!(f, "BACKTICK_REJECTED"),
1009 }
1010 }
1011}
1012
1013impl Token {
1014 pub fn is_keyword(&self) -> bool {
1019 matches!(
1020 self,
1021 Token::Set
1022 | Token::Local
1023 | Token::If
1024 | Token::Then
1025 | Token::Else
1026 | Token::Elif
1027 | Token::Fi
1028 | Token::For
1029 | Token::In
1030 | Token::Do
1031 | Token::Done
1032 | Token::While
1033 | Token::Case
1034 | Token::Esac
1035 | Token::Function
1036 | Token::Return
1037 | Token::Break
1038 | Token::Continue
1039 | Token::Exit
1040 | Token::True
1041 | Token::False
1042 )
1043 }
1044
1045 pub fn is_type(&self) -> bool {
1047 matches!(
1048 self,
1049 Token::TypeString
1050 | Token::TypeInt
1051 | Token::TypeFloat
1052 | Token::TypeBool
1053 )
1054 }
1055
1056 pub fn starts_statement(&self) -> bool {
1059 matches!(
1060 self,
1061 Token::Set
1062 | Token::Local
1063 | Token::Function
1064 | Token::If
1065 | Token::For
1066 | Token::While
1067 | Token::Case
1068 | Token::Ident(_)
1069 | Token::LBracket
1070 )
1071 }
1072
1073 pub fn is_value(&self) -> bool {
1075 matches!(
1076 self,
1077 Token::String(_)
1078 | Token::SingleString(_)
1079 | Token::HereDoc(_)
1080 | Token::Arithmetic(_)
1081 | Token::Int(_)
1082 | Token::Float(_)
1083 | Token::True
1084 | Token::False
1085 | Token::VarRef(_)
1086 | Token::SimpleVarRef(_)
1087 | Token::CmdSubstStart
1088 | Token::Path(_)
1089 | Token::GlobWord(_)
1090 | Token::LastExitCode
1091 | Token::CurrentPid
1092 )
1093 }
1094}
1095
1096struct ArithmeticPreprocessResult {
1098 text: String,
1100 arithmetics: Vec<(String, String)>,
1102 replacements: Vec<SpanReplacement>,
1104}
1105
1106fn skip_command_substitution(
1115 chars: &[char],
1116 i: &mut usize,
1117 source_pos: &mut usize,
1118 result: &mut String,
1119) {
1120 result.push('$');
1122 result.push('(');
1123 *i += 2;
1124 *source_pos += 2;
1125
1126 let mut depth: usize = 1;
1127 let mut in_single_quote = false;
1128 let mut in_double_quote = false;
1129
1130 while *i < chars.len() && depth > 0 {
1131 let c = chars[*i];
1132
1133 if in_single_quote {
1134 result.push(c);
1135 *source_pos += c.len_utf8();
1136 *i += 1;
1137 if c == '\'' {
1138 in_single_quote = false;
1139 }
1140 continue;
1141 }
1142
1143 if in_double_quote {
1144 if c == '\\' && *i + 1 < chars.len() {
1145 let next = chars[*i + 1];
1146 if next == '"' || next == '\\' || next == '$' || next == '`' {
1147 result.push(c);
1148 result.push(next);
1149 *source_pos += c.len_utf8() + next.len_utf8();
1150 *i += 2;
1151 continue;
1152 }
1153 }
1154 if c == '"' {
1155 in_double_quote = false;
1156 }
1157 result.push(c);
1158 *source_pos += c.len_utf8();
1159 *i += 1;
1160 continue;
1161 }
1162
1163 match c {
1165 '\'' => {
1166 in_single_quote = true;
1167 result.push(c);
1168 *source_pos += c.len_utf8();
1169 *i += 1;
1170 }
1171 '"' => {
1172 in_double_quote = true;
1173 result.push(c);
1174 *source_pos += c.len_utf8();
1175 *i += 1;
1176 }
1177 '\\' if *i + 1 < chars.len() => {
1178 result.push(c);
1179 result.push(chars[*i + 1]);
1180 *source_pos += c.len_utf8() + chars[*i + 1].len_utf8();
1181 *i += 2;
1182 }
1183 '(' => {
1184 depth += 1;
1185 result.push(c);
1186 *source_pos += c.len_utf8();
1187 *i += 1;
1188 }
1189 ')' => {
1190 depth -= 1;
1191 result.push(c);
1192 *source_pos += c.len_utf8();
1193 *i += 1;
1194 }
1195 _ => {
1196 result.push(c);
1197 *source_pos += c.len_utf8();
1198 *i += 1;
1199 }
1200 }
1201 }
1202}
1203
1204fn preprocess_arithmetic(source: &str) -> Result<ArithmeticPreprocessResult, LexerError> {
1218 let mut result = String::with_capacity(source.len());
1219 let mut arithmetics: Vec<(String, String)> = Vec::new();
1220 let mut replacements: Vec<SpanReplacement> = Vec::new();
1221 let mut source_pos: usize = 0;
1222 let chars_vec: Vec<char> = source.chars().collect();
1223 let mut i = 0;
1224
1225 let mut in_double_quote = false;
1228
1229 while i < chars_vec.len() {
1230 let ch = chars_vec[i];
1231
1232 if !in_double_quote && ch == '\\' && i + 1 < chars_vec.len() {
1234 result.push(ch);
1235 result.push(chars_vec[i + 1]);
1236 source_pos += ch.len_utf8() + chars_vec[i + 1].len_utf8();
1237 i += 2;
1238 continue;
1239 }
1240
1241 if ch == '\'' && !in_double_quote {
1243 result.push(ch);
1244 i += 1;
1245 source_pos += 1;
1246 while i < chars_vec.len() && chars_vec[i] != '\'' {
1247 result.push(chars_vec[i]);
1248 source_pos += chars_vec[i].len_utf8();
1249 i += 1;
1250 }
1251 if i < chars_vec.len() {
1252 result.push(chars_vec[i]); source_pos += 1;
1254 i += 1;
1255 }
1256 continue;
1257 }
1258
1259 if ch == '"' {
1261 in_double_quote = !in_double_quote;
1262 result.push(ch);
1263 i += 1;
1264 source_pos += 1;
1265 continue;
1266 }
1267
1268 if in_double_quote && ch == '\\' && i + 1 < chars_vec.len() {
1270 let next = chars_vec[i + 1];
1271 if next == '"' || next == '\\' || next == '$' || next == '`' {
1272 result.push(ch);
1273 result.push(next);
1274 source_pos += ch.len_utf8() + next.len_utf8();
1275 i += 2;
1276 continue;
1277 }
1278 }
1279
1280 if ch == '#' && !in_double_quote {
1288 while i < chars_vec.len() && chars_vec[i] != '\n' && chars_vec[i] != '\r' {
1289 result.push(chars_vec[i]);
1290 source_pos += chars_vec[i].len_utf8();
1291 i += 1;
1292 }
1293 continue;
1294 }
1295
1296 if ch == '$' && i + 1 < chars_vec.len() && chars_vec[i + 1] == '('
1298 && !(i + 2 < chars_vec.len() && chars_vec[i + 2] == '(')
1299 {
1300 skip_command_substitution(&chars_vec, &mut i, &mut source_pos, &mut result);
1301 continue;
1302 }
1303
1304 if ch == '$' && i + 2 < chars_vec.len() && chars_vec[i + 1] == '(' && chars_vec[i + 2] == '(' {
1306 let arith_start_pos = result.len();
1307 let original_start = source_pos;
1308
1309 i += 3;
1311 source_pos += 3;
1312
1313 let mut expr = String::new();
1315 let mut paren_depth: usize = 0;
1316
1317 while i < chars_vec.len() {
1318 let c = chars_vec[i];
1319 match c {
1320 '(' => {
1321 paren_depth += 1;
1322 if paren_depth > MAX_PAREN_DEPTH {
1323 return Err(LexerError::NestingTooDeep);
1324 }
1325 expr.push('(');
1326 i += 1;
1327 source_pos += c.len_utf8();
1328 }
1329 ')' => {
1330 if paren_depth > 0 {
1331 paren_depth -= 1;
1332 expr.push(')');
1333 i += 1;
1334 source_pos += 1;
1335 } else if i + 1 < chars_vec.len() && chars_vec[i + 1] == ')' {
1336 i += 2;
1338 source_pos += 2;
1339 break;
1340 } else {
1341 expr.push(')');
1343 i += 1;
1344 source_pos += 1;
1345 }
1346 }
1347 _ => {
1348 expr.push(c);
1349 i += 1;
1350 source_pos += c.len_utf8();
1351 }
1352 }
1353 }
1354
1355 let original_len = source_pos - original_start;
1357
1358 let marker = format!("__KAISH_ARITH_{}__", unique_marker_id());
1360 let marker_len = marker.len();
1361
1362 replacements.push(SpanReplacement {
1364 preprocessed_pos: arith_start_pos,
1365 marker_len,
1366 original_len,
1367 });
1368
1369 arithmetics.push((marker.clone(), expr));
1370 result.push_str(&marker);
1371 } else {
1372 result.push(ch);
1373 i += 1;
1374 source_pos += ch.len_utf8();
1375 }
1376 }
1377
1378 Ok(ArithmeticPreprocessResult {
1379 text: result,
1380 arithmetics,
1381 replacements,
1382 })
1383}
1384
1385#[derive(Debug, Clone)]
1400struct HeredocReplacement {
1401 marker: String,
1402 body: String,
1403 literal: bool,
1404 strip_tabs: bool,
1405 body_start_offset: usize,
1406}
1407
1408fn preprocess_heredocs(source: &str) -> Result<(String, Vec<HeredocReplacement>), Spanned<LexerError>> {
1420 let mut result = String::with_capacity(source.len());
1421 let mut heredocs: Vec<HeredocReplacement> = Vec::new();
1422 let chars_vec: Vec<char> = source.chars().collect();
1423 let mut i = 0;
1424 let mut pos: usize = 0;
1428
1429 while i < chars_vec.len() {
1430 let ch = chars_vec[i];
1431
1432 if ch == '<'
1436 && chars_vec.get(i + 1) == Some(&'<')
1437 && chars_vec.get(i + 2) == Some(&'<')
1438 {
1439 result.push_str("<<<");
1440 i += 3;
1441 pos += 3;
1442 continue;
1443 }
1444
1445 if ch == '<' && chars_vec.get(i + 1) == Some(&'<') {
1447 let introducer_start = pos;
1450 i += 2; pos += 2;
1452
1453 let strip_tabs = chars_vec.get(i) == Some(&'-');
1455 if strip_tabs {
1456 i += 1;
1457 pos += 1;
1458 }
1459
1460 while let Some(&c) = chars_vec.get(i) {
1462 if c == ' ' || c == '\t' {
1463 i += 1;
1464 pos += 1;
1465 } else {
1466 break;
1467 }
1468 }
1469
1470 let mut delimiter = String::new();
1472 let quoted = chars_vec.get(i) == Some(&'\'') || chars_vec.get(i) == Some(&'"');
1473 let quote_char = if quoted {
1474 let q = chars_vec.get(i).copied();
1475 i += 1;
1476 pos += 1;
1477 q
1478 } else {
1479 None
1480 };
1481
1482 while let Some(&c) = chars_vec.get(i) {
1483 if quoted {
1484 if Some(c) == quote_char {
1485 i += 1; pos += 1;
1487 break;
1488 }
1489 } else if c.is_whitespace() || c == '\n' || c == '\r' {
1490 break;
1491 }
1492 delimiter.push(c);
1493 i += 1;
1494 pos += c.len_utf8();
1495 }
1496
1497 if delimiter.is_empty() {
1498 result.push_str("<<");
1500 if strip_tabs {
1501 result.push('-');
1502 }
1503 continue;
1504 }
1505
1506 let mut after_delimiter = String::new();
1509 while let Some(&c) = chars_vec.get(i) {
1510 if c == '\n' {
1511 i += 1;
1512 pos += 1;
1513 break;
1514 } else if c == '\r' {
1515 i += 1;
1516 pos += 1;
1517 if chars_vec.get(i) == Some(&'\n') {
1518 i += 1;
1519 pos += 1;
1520 }
1521 break;
1522 }
1523 after_delimiter.push(c);
1524 i += 1;
1525 pos += c.len_utf8();
1526 }
1527
1528 let body_start_offset = pos;
1534 let mut content = String::new();
1535 let mut current_line = String::new();
1536
1537 loop {
1538 let next = chars_vec.get(i).copied();
1539 match next {
1540 Some('\n') => {
1541 i += 1;
1542 pos += 1;
1543 let trimmed = if strip_tabs {
1545 current_line.trim_start_matches('\t')
1546 } else {
1547 ¤t_line
1548 };
1549 if trimmed == delimiter {
1550 break;
1552 }
1553 content.push_str(¤t_line);
1555 content.push('\n');
1556 current_line.clear();
1557 }
1558 Some('\r') => {
1559 i += 1;
1560 pos += 1;
1561 let crlf = chars_vec.get(i) == Some(&'\n');
1567 if crlf {
1568 i += 1;
1569 pos += 1;
1570 }
1571 let trimmed = if strip_tabs {
1572 current_line.trim_start_matches('\t')
1573 } else {
1574 ¤t_line
1575 };
1576 if trimmed == delimiter {
1577 break;
1578 }
1579 content.push_str(¤t_line);
1580 content.push_str(if crlf { "\r\n" } else { "\r" });
1581 current_line.clear();
1582 }
1583 Some(c) => {
1584 current_line.push(c);
1585 i += 1;
1586 pos += c.len_utf8();
1587 }
1588 None => {
1589 let trimmed = if strip_tabs {
1592 current_line.trim_start_matches('\t')
1593 } else {
1594 ¤t_line
1595 };
1596 if trimmed == delimiter {
1597 break;
1598 }
1599 let span_end = introducer_start
1604 + 2
1605 + if strip_tabs { 1 } else { 0 }
1606 + delimiter.len();
1607 return Err(Spanned::new(
1608 LexerError::UnterminatedHeredoc {
1609 delimiter: delimiter.clone(),
1610 },
1611 introducer_start..span_end,
1612 ));
1613 }
1614 }
1615 }
1616
1617 let marker = format!("__KAISH_HEREDOC_{}__", unique_marker_id());
1619 heredocs.push(HeredocReplacement {
1620 marker: marker.clone(),
1621 body: content,
1622 literal: quoted,
1623 strip_tabs,
1624 body_start_offset,
1625 });
1626
1627 result.push_str("<<");
1630 result.push_str(&marker);
1631 result.push_str(&after_delimiter);
1632 result.push('\n');
1633 } else {
1634 result.push(ch);
1635 i += 1;
1636 pos += ch.len_utf8();
1637 }
1638 }
1639
1640 Ok((result, heredocs))
1641}
1642
1643fn mergeable_text(token: &Token) -> Option<String> {
1648 match token {
1649 Token::Ident(s) => Some(s.clone()),
1650 Token::NumberIdent(s) => Some(s.clone()),
1651 Token::DottedIdent(s) => Some(s.clone()),
1652 Token::Colon => Some(":".to_string()),
1653 Token::Int(n) => Some(n.to_string()),
1654 Token::Path(p) => Some(p.clone()),
1655 Token::Float(f) => Some(f.to_string()),
1656 _ => None,
1657 }
1658}
1659
1660fn merge_colon_adjacent(tokens: Vec<Spanned<Token>>) -> Vec<Spanned<Token>> {
1669 if tokens.is_empty() {
1670 return tokens;
1671 }
1672
1673 let mut result = Vec::with_capacity(tokens.len());
1674 let mut run: Vec<&Spanned<Token>> = Vec::new();
1675
1676 for token in &tokens {
1677 if run.is_empty() {
1678 if mergeable_text(&token.token).is_some() {
1679 run.push(token);
1680 } else {
1681 result.push(token.clone());
1682 }
1683 continue;
1684 }
1685
1686 let Some(last) = run.last() else { unreachable!() };
1689 let adjacent = last.span.end == token.span.start;
1690
1691 if adjacent && mergeable_text(&token.token).is_some() {
1692 run.push(token);
1693 } else {
1694 flush_colon_run(&mut run, &mut result);
1695 if mergeable_text(&token.token).is_some() {
1696 run.push(token);
1697 } else {
1698 result.push(token.clone());
1699 }
1700 }
1701 }
1702
1703 flush_colon_run(&mut run, &mut result);
1704
1705 result
1706}
1707
1708fn flush_colon_run(run: &mut Vec<&Spanned<Token>>, result: &mut Vec<Spanned<Token>>) {
1710 if run.is_empty() {
1711 return;
1712 }
1713
1714 let has_colon = run.iter().any(|t| matches!(t.token, Token::Colon));
1715
1716 if run.len() >= 2 && has_colon {
1717 let text: String = run
1718 .iter()
1719 .filter_map(|t| mergeable_text(&t.token))
1720 .collect();
1721 let start = run.first().map(|t| t.span.start).unwrap_or(0);
1723 let end = run.last().map(|t| t.span.end).unwrap_or(0);
1724 result.push(Spanned::new(Token::Ident(text), start..end));
1725 } else {
1726 for t in run.iter() {
1727 result.push((*t).clone());
1728 }
1729 }
1730
1731 run.clear();
1732}
1733
1734fn glob_mergeable_text(token: &Token) -> Option<String> {
1739 match token {
1740 Token::Star => Some("*".to_string()),
1741 Token::Question => Some("?".to_string()),
1742 Token::Dot => Some(".".to_string()),
1743 Token::DotDot => Some("..".to_string()),
1744 Token::Ident(s) => Some(s.clone()),
1745 Token::NumberIdent(s) => Some(s.clone()),
1746 Token::DottedIdent(s) => Some(s.clone()),
1747 Token::Path(s) => Some(s.clone()),
1748 Token::Int(n) => Some(n.to_string()),
1749 Token::LBracket => Some("[".to_string()),
1750 Token::RBracket => Some("]".to_string()),
1751 Token::Bang => Some("!".to_string()),
1752 Token::DotSlashPath(s) => Some(s.clone()),
1753 Token::RelativePath(s) => Some(s.clone()),
1754 Token::TildePath(s) => Some(s.clone()),
1755 Token::Tilde => Some("~".to_string()),
1756 Token::LBrace => Some("{".to_string()),
1757 Token::RBrace => Some("}".to_string()),
1758 Token::Comma => Some(",".to_string()),
1759 _ => None,
1760 }
1761}
1762
1763fn merge_glob_adjacent(tokens: Vec<Spanned<Token>>) -> Vec<Spanned<Token>> {
1771 if tokens.is_empty() {
1772 return tokens;
1773 }
1774
1775 let mut result = Vec::with_capacity(tokens.len());
1776 let mut run: Vec<&Spanned<Token>> = Vec::new();
1777
1778 for token in &tokens {
1779 if run.is_empty() {
1780 if glob_mergeable_text(&token.token).is_some() {
1781 run.push(token);
1782 } else {
1783 result.push(token.clone());
1784 }
1785 continue;
1786 }
1787
1788 let Some(last) = run.last() else { unreachable!() };
1790 let adjacent = last.span.end == token.span.start;
1791
1792 if adjacent && glob_mergeable_text(&token.token).is_some() {
1793 run.push(token);
1794 } else {
1795 flush_glob_run(&mut run, &mut result);
1796 if glob_mergeable_text(&token.token).is_some() {
1797 run.push(token);
1798 } else {
1799 result.push(token.clone());
1800 }
1801 }
1802 }
1803
1804 flush_glob_run(&mut run, &mut result);
1805
1806 result
1807}
1808
1809fn flush_glob_run(run: &mut Vec<&Spanned<Token>>, result: &mut Vec<Spanned<Token>>) {
1811 if run.is_empty() {
1812 return;
1813 }
1814
1815 let has_glob = run.iter().any(|t| {
1816 matches!(t.token, Token::Star | Token::Question)
1817 }) || (run.iter().any(|t| matches!(t.token, Token::LBracket))
1818 && run.iter().any(|t| matches!(t.token, Token::RBracket)));
1819
1820 if run.len() >= 2 && has_glob {
1821 let text: String = run
1822 .iter()
1823 .filter_map(|t| glob_mergeable_text(&t.token))
1824 .collect();
1825 let start = run.first().map(|t| t.span.start).unwrap_or(0);
1826 let end = run.last().map(|t| t.span.end).unwrap_or(0);
1827 result.push(Spanned::new(Token::GlobWord(text), start..end));
1828 } else {
1829 for t in run.iter() {
1830 result.push((*t).clone());
1831 }
1832 }
1833
1834 run.clear();
1835}
1836
1837pub fn tokenize(source: &str) -> Result<Vec<Spanned<Token>>, Vec<Spanned<LexerError>>> {
1847 let arith_result = preprocess_arithmetic(source)
1849 .map_err(|e| vec![Spanned::new(e, 0..source.len())])?;
1850
1851 let span_replacements = arith_result.replacements;
1855 let (preprocessed, heredocs) = preprocess_heredocs(&arith_result.text)
1856 .map_err(|e| {
1857 let span = correct_span(e.span, &span_replacements);
1858 vec![Spanned::new(e.token, span)]
1859 })?;
1860
1861 let lexer = Token::lexer(&preprocessed);
1862 let mut tokens = Vec::new();
1863 let mut errors = Vec::new();
1864
1865 for (result, span) in lexer.spanned() {
1866 let corrected_span = correct_span(span, &span_replacements);
1868 match result {
1869 Ok(token) => {
1870 if !matches!(token, Token::Comment | Token::LineContinuation) {
1872 tokens.push(Spanned::new(token, corrected_span));
1873 }
1874 }
1875 Err(err) => {
1876 errors.push(Spanned::new(err, corrected_span));
1877 }
1878 }
1879 }
1880
1881 if !errors.is_empty() {
1882 return Err(errors);
1883 }
1884
1885 let mut final_tokens = Vec::with_capacity(tokens.len());
1887 let mut i = 0;
1888
1889 while i < tokens.len() {
1890 if let Token::Ident(ref name) = tokens[i].token
1892 && name.starts_with("__KAISH_ARITH_") && name.ends_with("__")
1893 && let Some((_, expr)) = arith_result.arithmetics.iter().find(|(marker, _)| marker == name) {
1894 final_tokens.push(Spanned::new(Token::Arithmetic(expr.clone()), tokens[i].span.clone()));
1895 i += 1;
1896 continue;
1897 }
1898
1899 if matches!(tokens[i].token, Token::HereDocStart) {
1901 if i + 1 < tokens.len()
1903 && let Token::Ident(ref name) = tokens[i + 1].token
1904 && name.starts_with("__KAISH_HEREDOC_") && name.ends_with("__") {
1905 if let Some(hd) = heredocs.iter().find(|h| h.marker == *name) {
1907 let mut content = hd.body.clone();
1919 for (marker, expr) in &arith_result.arithmetics {
1920 if content.contains(marker) {
1921 let replacement = if hd.literal {
1922 format!("$(({}))", expr)
1923 } else {
1924 format!("${{__ARITH:{}__}}", expr)
1925 };
1926 content = content.replace(marker, &replacement);
1927 }
1928 }
1929 final_tokens.push(Spanned::new(Token::HereDocStart, tokens[i].span.clone()));
1930 final_tokens.push(Spanned::new(
1931 Token::HereDoc(HereDocData {
1932 content,
1933 literal: hd.literal,
1934 strip_tabs: hd.strip_tabs,
1935 body_start_offset: hd.body_start_offset,
1936 }),
1937 tokens[i + 1].span.clone(),
1938 ));
1939 i += 2;
1940 continue;
1941 }
1942 }
1943 }
1944
1945 let token = if let Token::String(ref s) = tokens[i].token {
1947 let mut new_content = s.clone();
1949 for (marker, expr) in &arith_result.arithmetics {
1950 if new_content.contains(marker) {
1951 new_content = new_content.replace(marker, &format!("${{__ARITH:{}__}}", expr));
1954 }
1955 }
1956 if new_content != *s {
1957 Spanned::new(Token::String(new_content), tokens[i].span.clone())
1958 } else {
1959 tokens[i].clone()
1960 }
1961 } else {
1962 tokens[i].clone()
1963 };
1964 final_tokens.push(token);
1965 i += 1;
1966 }
1967
1968 Ok(merge_glob_adjacent(merge_colon_adjacent(final_tokens)))
1969}
1970
1971pub fn tokenize_with_comments(source: &str) -> Result<Vec<Spanned<Token>>, Vec<Spanned<LexerError>>> {
1975 let lexer = Token::lexer(source);
1976 let mut tokens = Vec::new();
1977 let mut errors = Vec::new();
1978
1979 for (result, span) in lexer.spanned() {
1980 match result {
1981 Ok(token) => {
1982 tokens.push(Spanned::new(token, span));
1983 }
1984 Err(err) => {
1985 errors.push(Spanned::new(err, span));
1986 }
1987 }
1988 }
1989
1990 if errors.is_empty() {
1991 Ok(tokens)
1992 } else {
1993 Err(errors)
1994 }
1995}
1996
1997pub fn parse_string_literal(source: &str) -> Result<String, LexerError> {
1999 if source.len() < 2 || !source.starts_with('"') || !source.ends_with('"') {
2001 return Err(LexerError::UnterminatedString);
2002 }
2003
2004 let inner = &source[1..source.len() - 1];
2005 let mut result = String::with_capacity(inner.len());
2006 let mut chars = inner.chars().peekable();
2007
2008 while let Some(ch) = chars.next() {
2009 if ch == '\\' {
2010 match chars.next() {
2011 Some('n') => result.push('\n'),
2012 Some('t') => result.push('\t'),
2013 Some('r') => result.push('\r'),
2014 Some('\\') => result.push('\\'),
2015 Some('"') => result.push('"'),
2016 Some('$') => result.push_str("__KAISH_ESCAPED_DOLLAR__"),
2019 Some('u') => {
2020 let mut hex = String::with_capacity(4);
2022 for _ in 0..4 {
2023 match chars.next() {
2024 Some(h) if h.is_ascii_hexdigit() => hex.push(h),
2025 _ => return Err(LexerError::InvalidEscape),
2026 }
2027 }
2028 let codepoint = u32::from_str_radix(&hex, 16)
2029 .map_err(|_| LexerError::InvalidEscape)?;
2030 let ch = char::from_u32(codepoint)
2031 .ok_or(LexerError::InvalidEscape)?;
2032 result.push(ch);
2033 }
2034 Some(next) => {
2036 result.push('\\');
2037 result.push(next);
2038 }
2039 None => return Err(LexerError::InvalidEscape),
2040 }
2041 } else {
2042 result.push(ch);
2043 }
2044 }
2045
2046 Ok(result)
2047}
2048
2049pub fn parse_var_ref(source: &str) -> Result<Vec<String>, LexerError> {
2052 if source.len() < 4 || !source.starts_with("${") || !source.ends_with('}') {
2054 return Err(LexerError::UnterminatedVarRef);
2055 }
2056
2057 let inner = &source[2..source.len() - 1];
2058
2059 if inner == "?" {
2061 return Ok(vec!["?".to_string()]);
2062 }
2063
2064 let mut segments = Vec::new();
2065 let mut current = String::new();
2066 let mut chars = inner.chars().peekable();
2067
2068 while let Some(ch) = chars.next() {
2069 match ch {
2070 '.' => {
2071 if !current.is_empty() {
2072 segments.push(current.clone());
2073 current.clear();
2074 }
2075 }
2076 '[' => {
2077 if !current.is_empty() {
2078 segments.push(current.clone());
2079 current.clear();
2080 }
2081 let mut index = String::from("[");
2083 while let Some(&c) = chars.peek() {
2084 if let Some(c) = chars.next() {
2085 index.push(c);
2086 }
2087 if c == ']' {
2088 break;
2089 }
2090 }
2091 segments.push(index);
2092 }
2093 _ => {
2094 current.push(ch);
2095 }
2096 }
2097 }
2098
2099 if !current.is_empty() {
2100 segments.push(current);
2101 }
2102
2103 Ok(segments)
2104}
2105
2106pub fn parse_int(source: &str) -> Result<i64, LexerError> {
2108 source.parse().map_err(|_| LexerError::InvalidNumber)
2109}
2110
2111pub fn parse_float(source: &str) -> Result<f64, LexerError> {
2113 source.parse().map_err(|_| LexerError::InvalidNumber)
2114}
2115
2116#[cfg(test)]
2117#[allow(clippy::approx_constant)]
2118mod tests {
2119 use super::*;
2120
2121 fn lex(source: &str) -> Vec<Token> {
2122 tokenize(source)
2123 .expect("lexer should succeed")
2124 .into_iter()
2125 .map(|s| s.token)
2126 .collect()
2127 }
2128
2129 #[test]
2134 fn keywords() {
2135 assert_eq!(lex("set"), vec![Token::Set]);
2136 assert_eq!(lex("if"), vec![Token::If]);
2137 assert_eq!(lex("then"), vec![Token::Then]);
2138 assert_eq!(lex("else"), vec![Token::Else]);
2139 assert_eq!(lex("elif"), vec![Token::Elif]);
2140 assert_eq!(lex("fi"), vec![Token::Fi]);
2141 assert_eq!(lex("for"), vec![Token::For]);
2142 assert_eq!(lex("in"), vec![Token::In]);
2143 assert_eq!(lex("do"), vec![Token::Do]);
2144 assert_eq!(lex("done"), vec![Token::Done]);
2145 assert_eq!(lex("case"), vec![Token::Case]);
2146 assert_eq!(lex("esac"), vec![Token::Esac]);
2147 assert_eq!(lex("function"), vec![Token::Function]);
2148 assert_eq!(lex("true"), vec![Token::True]);
2149 assert_eq!(lex("false"), vec![Token::False]);
2150 }
2151
2152 #[test]
2153 fn double_semicolon() {
2154 assert_eq!(lex(";;"), vec![Token::DoubleSemi]);
2155 assert_eq!(lex("echo \"hi\";;"), vec![
2157 Token::Ident("echo".to_string()),
2158 Token::String("hi".to_string()),
2159 Token::DoubleSemi,
2160 ]);
2161 }
2162
2163 #[test]
2164 fn type_keywords() {
2165 assert_eq!(lex("string"), vec![Token::TypeString]);
2166 assert_eq!(lex("int"), vec![Token::TypeInt]);
2167 assert_eq!(lex("float"), vec![Token::TypeFloat]);
2168 assert_eq!(lex("bool"), vec![Token::TypeBool]);
2169 }
2170
2171 #[test]
2176 fn single_char_operators() {
2177 assert_eq!(lex("="), vec![Token::Eq]);
2178 assert_eq!(lex("|"), vec![Token::Pipe]);
2179 assert_eq!(lex("&"), vec![Token::Amp]);
2180 assert_eq!(lex(">"), vec![Token::Gt]);
2181 assert_eq!(lex("<"), vec![Token::Lt]);
2182 assert_eq!(lex(";"), vec![Token::Semi]);
2183 assert_eq!(lex(":"), vec![Token::Colon]);
2184 assert_eq!(lex(","), vec![Token::Comma]);
2185 assert_eq!(lex("."), vec![Token::Dot]);
2186 }
2187
2188 #[test]
2189 fn multi_char_operators() {
2190 assert_eq!(lex("&&"), vec![Token::And]);
2191 assert_eq!(lex("||"), vec![Token::Or]);
2192 assert_eq!(lex("=="), vec![Token::EqEq]);
2193 assert_eq!(lex("!="), vec![Token::NotEq]);
2194 assert_eq!(lex("=~"), vec![Token::Match]);
2195 assert_eq!(lex("!~"), vec![Token::NotMatch]);
2196 assert_eq!(lex(">="), vec![Token::GtEq]);
2197 assert_eq!(lex("<="), vec![Token::LtEq]);
2198 assert_eq!(lex(">>"), vec![Token::GtGt]);
2199 assert_eq!(lex("2>"), vec![Token::Stderr]);
2200 assert_eq!(lex("&>"), vec![Token::Both]);
2201 }
2202
2203 #[test]
2204 fn brackets() {
2205 assert_eq!(lex("{"), vec![Token::LBrace]);
2206 assert_eq!(lex("}"), vec![Token::RBrace]);
2207 assert_eq!(lex("["), vec![Token::LBracket]);
2208 assert_eq!(lex("]"), vec![Token::RBracket]);
2209 assert_eq!(lex("("), vec![Token::LParen]);
2210 assert_eq!(lex(")"), vec![Token::RParen]);
2211 }
2212
2213 #[test]
2218 fn integers() {
2219 assert_eq!(lex("0"), vec![Token::Int(0)]);
2220 assert_eq!(lex("42"), vec![Token::Int(42)]);
2221 assert_eq!(lex("-1"), vec![Token::Int(-1)]);
2222 assert_eq!(lex("999999"), vec![Token::Int(999999)]);
2223 }
2224
2225 #[test]
2226 fn floats() {
2227 assert_eq!(lex("3.14"), vec![Token::Float(3.14)]);
2228 assert_eq!(lex("-0.5"), vec![Token::Float(-0.5)]);
2229 assert_eq!(lex("123.456"), vec![Token::Float(123.456)]);
2230 }
2231
2232 #[test]
2233 fn strings() {
2234 assert_eq!(lex(r#""hello""#), vec![Token::String("hello".to_string())]);
2235 assert_eq!(lex(r#""hello world""#), vec![Token::String("hello world".to_string())]);
2236 assert_eq!(lex(r#""""#), vec![Token::String("".to_string())]); assert_eq!(lex(r#""with \"quotes\"""#), vec![Token::String("with \"quotes\"".to_string())]);
2238 assert_eq!(lex(r#""with\nnewline""#), vec![Token::String("with\nnewline".to_string())]);
2239 }
2240
2241 #[test]
2242 fn var_refs() {
2243 assert_eq!(lex("${X}"), vec![Token::VarRef("${X}".to_string())]);
2244 assert_eq!(lex("${VAR}"), vec![Token::VarRef("${VAR}".to_string())]);
2245 assert_eq!(lex("${VAR.field}"), vec![Token::VarRef("${VAR.field}".to_string())]);
2246 assert_eq!(lex("${VAR[0]}"), vec![Token::VarRef("${VAR[0]}".to_string())]);
2247 }
2248
2249 #[test]
2254 fn identifiers() {
2255 assert_eq!(lex("foo"), vec![Token::Ident("foo".to_string())]);
2256 assert_eq!(lex("foo_bar"), vec![Token::Ident("foo_bar".to_string())]);
2257 assert_eq!(lex("foo-bar"), vec![Token::Ident("foo-bar".to_string())]);
2258 assert_eq!(lex("_private"), vec![Token::Ident("_private".to_string())]);
2259 assert_eq!(lex("cmd123"), vec![Token::Ident("cmd123".to_string())]);
2260 }
2261
2262 #[test]
2263 fn keyword_prefix_identifiers() {
2264 assert_eq!(lex("setup"), vec![Token::Ident("setup".to_string())]);
2266 assert_eq!(lex("kaish-tools"), vec![Token::Ident("kaish-tools".to_string())]);
2267 assert_eq!(lex("iffy"), vec![Token::Ident("iffy".to_string())]);
2268 assert_eq!(lex("forked"), vec![Token::Ident("forked".to_string())]);
2269 assert_eq!(lex("done-with-it"), vec![Token::Ident("done-with-it".to_string())]);
2270 }
2271
2272 #[test]
2277 fn assignment() {
2278 assert_eq!(
2279 lex("set X = 5"),
2280 vec![Token::Set, Token::Ident("X".to_string()), Token::Eq, Token::Int(5)]
2281 );
2282 }
2283
2284 #[test]
2285 fn command_simple() {
2286 assert_eq!(lex("echo"), vec![Token::Ident("echo".to_string())]);
2287 assert_eq!(
2288 lex(r#"echo "hello""#),
2289 vec![Token::Ident("echo".to_string()), Token::String("hello".to_string())]
2290 );
2291 }
2292
2293 #[test]
2294 fn command_with_args() {
2295 assert_eq!(
2296 lex("cmd arg1 arg2"),
2297 vec![Token::Ident("cmd".to_string()), Token::Ident("arg1".to_string()), Token::Ident("arg2".to_string())]
2298 );
2299 }
2300
2301 #[test]
2302 fn command_with_named_args() {
2303 assert_eq!(
2304 lex("cmd key=value"),
2305 vec![Token::Ident("cmd".to_string()), Token::Ident("key".to_string()), Token::Eq, Token::Ident("value".to_string())]
2306 );
2307 }
2308
2309 #[test]
2310 fn pipeline() {
2311 assert_eq!(
2312 lex("a | b | c"),
2313 vec![Token::Ident("a".to_string()), Token::Pipe, Token::Ident("b".to_string()), Token::Pipe, Token::Ident("c".to_string())]
2314 );
2315 }
2316
2317 #[test]
2318 fn if_statement() {
2319 assert_eq!(
2320 lex("if true; then echo; fi"),
2321 vec![
2322 Token::If,
2323 Token::True,
2324 Token::Semi,
2325 Token::Then,
2326 Token::Ident("echo".to_string()),
2327 Token::Semi,
2328 Token::Fi
2329 ]
2330 );
2331 }
2332
2333 #[test]
2334 fn for_loop() {
2335 assert_eq!(
2336 lex("for X in items; do echo; done"),
2337 vec![
2338 Token::For,
2339 Token::Ident("X".to_string()),
2340 Token::In,
2341 Token::Ident("items".to_string()),
2342 Token::Semi,
2343 Token::Do,
2344 Token::Ident("echo".to_string()),
2345 Token::Semi,
2346 Token::Done
2347 ]
2348 );
2349 }
2350
2351 #[test]
2356 fn whitespace_ignored() {
2357 assert_eq!(lex(" set X = 5 "), lex("set X = 5"));
2358 }
2359
2360 #[test]
2361 fn newlines_preserved() {
2362 let tokens = lex("a\nb");
2363 assert_eq!(
2364 tokens,
2365 vec![Token::Ident("a".to_string()), Token::Newline, Token::Ident("b".to_string())]
2366 );
2367 }
2368
2369 #[test]
2370 fn multiple_newlines() {
2371 let tokens = lex("a\n\n\nb");
2372 assert_eq!(
2373 tokens,
2374 vec![Token::Ident("a".to_string()), Token::Newline, Token::Newline, Token::Newline, Token::Ident("b".to_string())]
2375 );
2376 }
2377
2378 #[test]
2383 fn comments_skipped() {
2384 assert_eq!(lex("# comment"), vec![]);
2385 assert_eq!(lex("a # comment"), vec![Token::Ident("a".to_string())]);
2386 assert_eq!(
2387 lex("a # comment\nb"),
2388 vec![Token::Ident("a".to_string()), Token::Newline, Token::Ident("b".to_string())]
2389 );
2390 }
2391
2392 #[test]
2393 fn comments_preserved_when_requested() {
2394 let tokens = tokenize_with_comments("a # comment")
2395 .expect("should succeed")
2396 .into_iter()
2397 .map(|s| s.token)
2398 .collect::<Vec<_>>();
2399 assert_eq!(tokens, vec![Token::Ident("a".to_string()), Token::Comment]);
2400 }
2401
2402 #[test]
2407 fn parse_simple_string() {
2408 assert_eq!(parse_string_literal(r#""hello""#).expect("ok"), "hello");
2409 }
2410
2411 #[test]
2412 fn parse_string_with_escapes() {
2413 assert_eq!(
2414 parse_string_literal(r#""hello\nworld""#).expect("ok"),
2415 "hello\nworld"
2416 );
2417 assert_eq!(
2418 parse_string_literal(r#""tab\there""#).expect("ok"),
2419 "tab\there"
2420 );
2421 assert_eq!(
2422 parse_string_literal(r#""quote\"here""#).expect("ok"),
2423 "quote\"here"
2424 );
2425 }
2426
2427 #[test]
2428 fn parse_string_with_unicode() {
2429 assert_eq!(
2430 parse_string_literal(r#""emoji \u2764""#).expect("ok"),
2431 "emoji ❤"
2432 );
2433 }
2434
2435 #[test]
2436 fn parse_string_with_escaped_dollar() {
2437 assert_eq!(
2440 parse_string_literal(r#""\$VAR""#).expect("ok"),
2441 "__KAISH_ESCAPED_DOLLAR__VAR"
2442 );
2443 assert_eq!(
2444 parse_string_literal(r#""cost: \$100""#).expect("ok"),
2445 "cost: __KAISH_ESCAPED_DOLLAR__100"
2446 );
2447 }
2448
2449 #[test]
2454 fn parse_simple_var() {
2455 assert_eq!(
2456 parse_var_ref("${X}").expect("ok"),
2457 vec!["X"]
2458 );
2459 }
2460
2461 #[test]
2462 fn parse_var_with_field() {
2463 assert_eq!(
2464 parse_var_ref("${VAR.field}").expect("ok"),
2465 vec!["VAR", "field"]
2466 );
2467 }
2468
2469 #[test]
2470 fn parse_var_with_index() {
2471 assert_eq!(
2472 parse_var_ref("${VAR[0]}").expect("ok"),
2473 vec!["VAR", "[0]"]
2474 );
2475 }
2476
2477 #[test]
2478 fn parse_var_nested() {
2479 assert_eq!(
2480 parse_var_ref("${VAR.field[0].nested}").expect("ok"),
2481 vec!["VAR", "field", "[0]", "nested"]
2482 );
2483 }
2484
2485 #[test]
2486 fn parse_last_result() {
2487 assert_eq!(
2488 parse_var_ref("${?}").expect("ok"),
2489 vec!["?"]
2490 );
2491 }
2492
2493 #[test]
2498 fn parse_integers() {
2499 assert_eq!(parse_int("0").expect("ok"), 0);
2500 assert_eq!(parse_int("42").expect("ok"), 42);
2501 assert_eq!(parse_int("-1").expect("ok"), -1);
2502 }
2503
2504 #[test]
2505 fn parse_floats() {
2506 assert!((parse_float("3.14").expect("ok") - 3.14).abs() < f64::EPSILON);
2507 assert!((parse_float("-0.5").expect("ok") - (-0.5)).abs() < f64::EPSILON);
2508 }
2509
2510 #[test]
2515 fn empty_input() {
2516 assert_eq!(lex(""), vec![]);
2517 }
2518
2519 #[test]
2520 fn only_whitespace() {
2521 assert_eq!(lex(" \t\t "), vec![]);
2522 }
2523
2524 #[test]
2525 fn json_array() {
2526 assert_eq!(
2527 lex(r#"[1, 2, 3]"#),
2528 vec![
2529 Token::LBracket,
2530 Token::Int(1),
2531 Token::Comma,
2532 Token::Int(2),
2533 Token::Comma,
2534 Token::Int(3),
2535 Token::RBracket
2536 ]
2537 );
2538 }
2539
2540 #[test]
2541 fn json_object() {
2542 assert_eq!(
2543 lex(r#"{"key": "value"}"#),
2544 vec![
2545 Token::LBrace,
2546 Token::String("key".to_string()),
2547 Token::Colon,
2548 Token::String("value".to_string()),
2549 Token::RBrace
2550 ]
2551 );
2552 }
2553
2554 #[test]
2555 fn redirect_operators() {
2556 assert_eq!(
2557 lex("cmd > file"),
2558 vec![Token::Ident("cmd".to_string()), Token::Gt, Token::Ident("file".to_string())]
2559 );
2560 assert_eq!(
2561 lex("cmd >> file"),
2562 vec![Token::Ident("cmd".to_string()), Token::GtGt, Token::Ident("file".to_string())]
2563 );
2564 assert_eq!(
2565 lex("cmd 2> err"),
2566 vec![Token::Ident("cmd".to_string()), Token::Stderr, Token::Ident("err".to_string())]
2567 );
2568 assert_eq!(
2569 lex("cmd &> all"),
2570 vec![Token::Ident("cmd".to_string()), Token::Both, Token::Ident("all".to_string())]
2571 );
2572 }
2573
2574 #[test]
2575 fn background_job() {
2576 assert_eq!(
2577 lex("cmd &"),
2578 vec![Token::Ident("cmd".to_string()), Token::Amp]
2579 );
2580 }
2581
2582 #[test]
2583 fn command_substitution() {
2584 assert_eq!(
2585 lex("$(cmd)"),
2586 vec![Token::CmdSubstStart, Token::Ident("cmd".to_string()), Token::RParen]
2587 );
2588 assert_eq!(
2589 lex("$(cmd arg)"),
2590 vec![
2591 Token::CmdSubstStart,
2592 Token::Ident("cmd".to_string()),
2593 Token::Ident("arg".to_string()),
2594 Token::RParen
2595 ]
2596 );
2597 assert_eq!(
2598 lex("$(a | b)"),
2599 vec![
2600 Token::CmdSubstStart,
2601 Token::Ident("a".to_string()),
2602 Token::Pipe,
2603 Token::Ident("b".to_string()),
2604 Token::RParen
2605 ]
2606 );
2607 }
2608
2609 #[test]
2610 fn complex_pipeline() {
2611 assert_eq!(
2612 lex(r#"cat file | grep pattern="foo" | head count=10"#),
2613 vec![
2614 Token::Ident("cat".to_string()),
2615 Token::Ident("file".to_string()),
2616 Token::Pipe,
2617 Token::Ident("grep".to_string()),
2618 Token::Ident("pattern".to_string()),
2619 Token::Eq,
2620 Token::String("foo".to_string()),
2621 Token::Pipe,
2622 Token::Ident("head".to_string()),
2623 Token::Ident("count".to_string()),
2624 Token::Eq,
2625 Token::Int(10),
2626 ]
2627 );
2628 }
2629
2630 #[test]
2635 fn short_flag() {
2636 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2637 assert_eq!(lex("-a"), vec![Token::ShortFlag("a".to_string())]);
2638 assert_eq!(lex("-v"), vec![Token::ShortFlag("v".to_string())]);
2639 }
2640
2641 #[test]
2642 fn short_flag_combined() {
2643 assert_eq!(lex("-la"), vec![Token::ShortFlag("la".to_string())]);
2645 assert_eq!(lex("-vvv"), vec![Token::ShortFlag("vvv".to_string())]);
2646 }
2647
2648 #[test]
2649 fn job_spec_lexes_as_one_token() {
2650 assert_eq!(lex("%1"), vec![Token::JobSpec("%1".to_string())]);
2652 assert_eq!(lex("%12"), vec![Token::JobSpec("%12".to_string())]);
2653 assert_eq!(
2654 lex("wait %1 %2"),
2655 vec![
2656 Token::Ident("wait".to_string()),
2657 Token::JobSpec("%1".to_string()),
2658 Token::JobSpec("%2".to_string()),
2659 ]
2660 );
2661 }
2662
2663 #[test]
2664 fn short_flag_with_internal_hyphens_is_one_token() {
2665 assert_eq!(
2669 lex("-not-a-flag"),
2670 vec![Token::ShortFlag("not-a-flag".to_string())]
2671 );
2672 assert_eq!(lex("--"), vec![Token::DoubleDash]);
2676 assert_eq!(lex("-"), vec![Token::MinusAlone]);
2677 }
2678
2679 #[test]
2680 fn long_flag() {
2681 assert_eq!(lex("--force"), vec![Token::LongFlag("force".to_string())]);
2682 assert_eq!(lex("--verbose"), vec![Token::LongFlag("verbose".to_string())]);
2683 assert_eq!(lex("--foo-bar"), vec![Token::LongFlag("foo-bar".to_string())]);
2684 }
2685
2686 #[test]
2687 fn double_dash() {
2688 assert_eq!(lex("--"), vec![Token::DoubleDash]);
2690 }
2691
2692 #[test]
2693 fn flags_vs_negative_numbers() {
2694 assert_eq!(lex("-123"), vec![Token::Int(-123)]);
2696 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2698 assert_eq!(
2701 lex("-1 a"),
2702 vec![Token::Int(-1), Token::Ident("a".to_string())]
2703 );
2704 }
2705
2706 #[test]
2707 fn command_with_flags() {
2708 assert_eq!(
2709 lex("ls -l"),
2710 vec![
2711 Token::Ident("ls".to_string()),
2712 Token::ShortFlag("l".to_string()),
2713 ]
2714 );
2715 assert_eq!(
2716 lex("git commit -m"),
2717 vec![
2718 Token::Ident("git".to_string()),
2719 Token::Ident("commit".to_string()),
2720 Token::ShortFlag("m".to_string()),
2721 ]
2722 );
2723 assert_eq!(
2724 lex("git push --force"),
2725 vec![
2726 Token::Ident("git".to_string()),
2727 Token::Ident("push".to_string()),
2728 Token::LongFlag("force".to_string()),
2729 ]
2730 );
2731 }
2732
2733 #[test]
2734 fn flag_with_value() {
2735 assert_eq!(
2736 lex(r#"git commit -m "message""#),
2737 vec![
2738 Token::Ident("git".to_string()),
2739 Token::Ident("commit".to_string()),
2740 Token::ShortFlag("m".to_string()),
2741 Token::String("message".to_string()),
2742 ]
2743 );
2744 assert_eq!(
2745 lex(r#"--message="hello""#),
2746 vec![
2747 Token::LongFlag("message".to_string()),
2748 Token::Eq,
2749 Token::String("hello".to_string()),
2750 ]
2751 );
2752 }
2753
2754 #[test]
2755 fn end_of_flags_marker() {
2756 assert_eq!(
2757 lex("git checkout -- file"),
2758 vec![
2759 Token::Ident("git".to_string()),
2760 Token::Ident("checkout".to_string()),
2761 Token::DoubleDash,
2762 Token::Ident("file".to_string()),
2763 ]
2764 );
2765 }
2766
2767 #[test]
2772 fn local_keyword() {
2773 assert_eq!(lex("local"), vec![Token::Local]);
2774 assert_eq!(
2775 lex("local X = 5"),
2776 vec![Token::Local, Token::Ident("X".to_string()), Token::Eq, Token::Int(5)]
2777 );
2778 }
2779
2780 #[test]
2781 fn simple_var_ref() {
2782 assert_eq!(lex("$X"), vec![Token::SimpleVarRef("X".to_string())]);
2783 assert_eq!(lex("$foo"), vec![Token::SimpleVarRef("foo".to_string())]);
2784 assert_eq!(lex("$foo_bar"), vec![Token::SimpleVarRef("foo_bar".to_string())]);
2785 assert_eq!(lex("$_private"), vec![Token::SimpleVarRef("_private".to_string())]);
2786 }
2787
2788 #[test]
2789 fn simple_var_ref_in_command() {
2790 assert_eq!(
2791 lex("echo $NAME"),
2792 vec![Token::Ident("echo".to_string()), Token::SimpleVarRef("NAME".to_string())]
2793 );
2794 }
2795
2796 #[test]
2797 fn single_quoted_strings() {
2798 assert_eq!(lex("'hello'"), vec![Token::SingleString("hello".to_string())]);
2799 assert_eq!(lex("'hello world'"), vec![Token::SingleString("hello world".to_string())]);
2800 assert_eq!(lex("''"), vec![Token::SingleString("".to_string())]);
2801 assert_eq!(lex(r"'no $VAR here'"), vec![Token::SingleString("no $VAR here".to_string())]);
2803 assert_eq!(lex(r"'backslash \n stays'"), vec![Token::SingleString(r"backslash \n stays".to_string())]);
2804 }
2805
2806 #[test]
2807 fn test_brackets() {
2808 assert_eq!(lex("[["), vec![Token::LBracket, Token::LBracket]);
2810 assert_eq!(lex("]]"), vec![Token::RBracket, Token::RBracket]);
2811 assert_eq!(
2812 lex("[[ -f file ]]"),
2813 vec![
2814 Token::LBracket,
2815 Token::LBracket,
2816 Token::ShortFlag("f".to_string()),
2817 Token::Ident("file".to_string()),
2818 Token::RBracket,
2819 Token::RBracket
2820 ]
2821 );
2822 }
2823
2824 #[test]
2825 fn test_expression_syntax() {
2826 assert_eq!(
2827 lex(r#"[[ $X == "value" ]]"#),
2828 vec![
2829 Token::LBracket,
2830 Token::LBracket,
2831 Token::SimpleVarRef("X".to_string()),
2832 Token::EqEq,
2833 Token::String("value".to_string()),
2834 Token::RBracket,
2835 Token::RBracket
2836 ]
2837 );
2838 }
2839
2840 #[test]
2841 fn bash_style_assignment() {
2842 assert_eq!(
2844 lex(r#"NAME="value""#),
2845 vec![
2846 Token::Ident("NAME".to_string()),
2847 Token::Eq,
2848 Token::String("value".to_string())
2849 ]
2850 );
2851 }
2852
2853 #[test]
2854 fn positional_params() {
2855 assert_eq!(lex("$0"), vec![Token::Positional(0)]);
2856 assert_eq!(lex("$1"), vec![Token::Positional(1)]);
2857 assert_eq!(lex("$9"), vec![Token::Positional(9)]);
2858 assert_eq!(lex("$@"), vec![Token::AllArgs]);
2859 assert_eq!(lex("$#"), vec![Token::ArgCount]);
2860 }
2861
2862 #[test]
2863 fn positional_in_context() {
2864 assert_eq!(
2865 lex("echo $1 $2"),
2866 vec![
2867 Token::Ident("echo".to_string()),
2868 Token::Positional(1),
2869 Token::Positional(2),
2870 ]
2871 );
2872 }
2873
2874 #[test]
2875 fn var_length() {
2876 assert_eq!(lex("${#X}"), vec![Token::VarLength("X".to_string())]);
2877 assert_eq!(lex("${#NAME}"), vec![Token::VarLength("NAME".to_string())]);
2878 assert_eq!(lex("${#foo_bar}"), vec![Token::VarLength("foo_bar".to_string())]);
2879 }
2880
2881 #[test]
2882 fn var_length_in_context() {
2883 assert_eq!(
2884 lex("echo ${#NAME}"),
2885 vec![
2886 Token::Ident("echo".to_string()),
2887 Token::VarLength("NAME".to_string()),
2888 ]
2889 );
2890 }
2891
2892 #[test]
2897 fn plus_flag() {
2898 assert_eq!(lex("+e"), vec![Token::PlusFlag("e".to_string())]);
2900 assert_eq!(lex("+x"), vec![Token::PlusFlag("x".to_string())]);
2901 assert_eq!(lex("+ex"), vec![Token::PlusFlag("ex".to_string())]);
2902 }
2903
2904 #[test]
2905 fn set_with_plus_flag() {
2906 assert_eq!(
2907 lex("set +e"),
2908 vec![
2909 Token::Set,
2910 Token::PlusFlag("e".to_string()),
2911 ]
2912 );
2913 }
2914
2915 #[test]
2916 fn set_with_multiple_flags() {
2917 assert_eq!(
2918 lex("set -e -u"),
2919 vec![
2920 Token::Set,
2921 Token::ShortFlag("e".to_string()),
2922 Token::ShortFlag("u".to_string()),
2923 ]
2924 );
2925 }
2926
2927 #[test]
2928 fn flags_vs_negative_numbers_edge_cases() {
2929 assert_eq!(
2931 lex("-1 a"),
2932 vec![Token::Int(-1), Token::Ident("a".to_string())]
2933 );
2934 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2936 assert_eq!(lex("-123"), vec![Token::Int(-123)]);
2938 }
2939
2940 #[test]
2941 fn single_dash_is_minus_alone() {
2942 let result = tokenize("-").expect("should lex");
2944 assert_eq!(result.len(), 1);
2945 assert!(matches!(result[0].token, Token::MinusAlone));
2946 }
2947
2948 #[test]
2949 fn plus_bare_for_date_format() {
2950 let result = tokenize("+%s").expect("should lex");
2952 assert_eq!(result.len(), 1);
2953 assert!(matches!(result[0].token, Token::PlusBare(ref s) if s == "+%s"));
2954
2955 let result = tokenize("+%Y-%m-%d").expect("should lex");
2957 assert_eq!(result.len(), 1);
2958 assert!(matches!(result[0].token, Token::PlusBare(ref s) if s == "+%Y-%m-%d"));
2959 }
2960
2961 #[test]
2962 fn plus_flag_still_works() {
2963 let result = tokenize("+e").expect("should lex");
2965 assert_eq!(result.len(), 1);
2966 assert!(matches!(result[0].token, Token::PlusFlag(ref s) if s == "e"));
2967 }
2968
2969 #[test]
2970 fn while_keyword_vs_while_loop() {
2971 assert_eq!(lex("while"), vec![Token::While]);
2973 assert_eq!(
2975 lex("while true"),
2976 vec![Token::While, Token::True]
2977 );
2978 }
2979
2980 #[test]
2981 fn control_flow_keywords() {
2982 assert_eq!(lex("break"), vec![Token::Break]);
2983 assert_eq!(lex("continue"), vec![Token::Continue]);
2984 assert_eq!(lex("return"), vec![Token::Return]);
2985 assert_eq!(lex("exit"), vec![Token::Exit]);
2986 }
2987
2988 #[test]
2989 fn control_flow_with_numbers() {
2990 assert_eq!(
2991 lex("break 2"),
2992 vec![Token::Break, Token::Int(2)]
2993 );
2994 assert_eq!(
2995 lex("continue 3"),
2996 vec![Token::Continue, Token::Int(3)]
2997 );
2998 assert_eq!(
2999 lex("exit 1"),
3000 vec![Token::Exit, Token::Int(1)]
3001 );
3002 }
3003
3004 #[test]
3009 fn heredoc_simple() {
3010 let source = "cat <<EOF\nhello\nworld\nEOF";
3011 let tokens = lex(source);
3012 assert_eq!(tokens, vec![
3014 Token::Ident("cat".to_string()),
3015 Token::HereDocStart,
3016 Token::HereDoc(HereDocData {
3017 content: "hello\nworld\n".to_string(),
3018 literal: false,
3019 strip_tabs: false,
3020 body_start_offset: 10,
3021 }),
3022 Token::Newline,
3023 ]);
3024 }
3025
3026 #[test]
3027 fn heredoc_empty() {
3028 let source = "cat <<EOF\nEOF";
3029 let tokens = lex(source);
3030 assert_eq!(tokens, vec![
3031 Token::Ident("cat".to_string()),
3032 Token::HereDocStart,
3033 Token::HereDoc(HereDocData {
3034 content: "".to_string(),
3035 literal: false,
3036 strip_tabs: false,
3037 body_start_offset: 10,
3038 }),
3039 Token::Newline,
3040 ]);
3041 }
3042
3043 #[test]
3044 fn heredoc_with_special_chars() {
3045 let source = "cat <<EOF\n$VAR and \"quoted\" 'single'\nEOF";
3046 let tokens = lex(source);
3047 assert_eq!(tokens, vec![
3048 Token::Ident("cat".to_string()),
3049 Token::HereDocStart,
3050 Token::HereDoc(HereDocData {
3051 content: "$VAR and \"quoted\" 'single'\n".to_string(),
3052 literal: false,
3053 strip_tabs: false,
3054 body_start_offset: 10,
3055 }),
3056 Token::Newline,
3057 ]);
3058 }
3059
3060 #[test]
3061 fn heredoc_multiline() {
3062 let source = "cat <<END\nline1\nline2\nline3\nEND";
3063 let tokens = lex(source);
3064 assert_eq!(tokens, vec![
3065 Token::Ident("cat".to_string()),
3066 Token::HereDocStart,
3067 Token::HereDoc(HereDocData {
3068 content: "line1\nline2\nline3\n".to_string(),
3069 literal: false,
3070 strip_tabs: false,
3071 body_start_offset: 10,
3072 }),
3073 Token::Newline,
3074 ]);
3075 }
3076
3077 #[test]
3078 fn heredoc_in_command() {
3079 let source = "cat <<EOF\nhello\nEOF\necho goodbye";
3080 let tokens = lex(source);
3081 assert_eq!(tokens, vec![
3082 Token::Ident("cat".to_string()),
3083 Token::HereDocStart,
3084 Token::HereDoc(HereDocData {
3085 content: "hello\n".to_string(),
3086 literal: false,
3087 strip_tabs: false,
3088 body_start_offset: 10,
3089 }),
3090 Token::Newline,
3091 Token::Ident("echo".to_string()),
3092 Token::Ident("goodbye".to_string()),
3093 ]);
3094 }
3095
3096 #[test]
3097 fn heredoc_strip_tabs() {
3098 let source = "cat <<-EOF\n\thello\n\tworld\n\tEOF";
3099 let tokens = lex(source);
3100 assert_eq!(tokens, vec![
3104 Token::Ident("cat".to_string()),
3105 Token::HereDocStart,
3106 Token::HereDoc(HereDocData {
3107 content: "\thello\n\tworld\n".to_string(),
3108 literal: false,
3109 strip_tabs: true,
3110 body_start_offset: 11,
3111 }),
3112 Token::Newline,
3113 ]);
3114 }
3115
3116 #[test]
3121 fn arithmetic_simple() {
3122 let source = "$((1 + 2))";
3123 let tokens = lex(source);
3124 assert_eq!(tokens, vec![Token::Arithmetic("1 + 2".to_string())]);
3125 }
3126
3127 #[test]
3128 fn arithmetic_in_assignment() {
3129 let source = "X=$((5 * 3))";
3130 let tokens = lex(source);
3131 assert_eq!(tokens, vec![
3132 Token::Ident("X".to_string()),
3133 Token::Eq,
3134 Token::Arithmetic("5 * 3".to_string()),
3135 ]);
3136 }
3137
3138 #[test]
3139 fn arithmetic_with_nested_parens() {
3140 let source = "$((2 * (3 + 4)))";
3141 let tokens = lex(source);
3142 assert_eq!(tokens, vec![Token::Arithmetic("2 * (3 + 4)".to_string())]);
3143 }
3144
3145 #[test]
3146 fn arithmetic_with_variable() {
3147 let source = "$((X + 1))";
3148 let tokens = lex(source);
3149 assert_eq!(tokens, vec![Token::Arithmetic("X + 1".to_string())]);
3150 }
3151
3152 #[test]
3153 fn arithmetic_command_subst_not_confused() {
3154 let source = "$(echo hello)";
3156 let tokens = lex(source);
3157 assert_eq!(tokens, vec![
3158 Token::CmdSubstStart,
3159 Token::Ident("echo".to_string()),
3160 Token::Ident("hello".to_string()),
3161 Token::RParen,
3162 ]);
3163 }
3164
3165 #[test]
3166 fn arithmetic_nesting_limit() {
3167 let open_parens = "(".repeat(300);
3169 let close_parens = ")".repeat(300);
3170 let source = format!("$(({}1{}))", open_parens, close_parens);
3171 let result = tokenize(&source);
3172 assert!(result.is_err());
3173 let errors = result.unwrap_err();
3174 assert_eq!(errors.len(), 1);
3175 assert_eq!(errors[0].token, LexerError::NestingTooDeep);
3176 }
3177
3178 #[test]
3179 fn arithmetic_nesting_within_limit() {
3180 let source = "$((((1 + 2) * 3)))";
3182 let tokens = lex(source);
3183 assert_eq!(tokens, vec![Token::Arithmetic("((1 + 2) * 3)".to_string())]);
3184 }
3185
3186 #[test]
3198 fn arithmetic_after_apostrophe_in_comment() {
3199 let source = "# this doesn't work\necho $((1+2))";
3202 let tokens = lex(source);
3203 assert_eq!(tokens, vec![
3204 Token::Newline,
3205 Token::Ident("echo".to_string()),
3206 Token::Arithmetic("1+2".to_string()),
3207 ]);
3208 }
3209
3210 #[test]
3211 fn arithmetic_inside_comment_is_not_expanded() {
3212 let source = "# the $((y)) syntax explained\necho hello";
3214 let tokens = lex(source);
3215 assert_eq!(tokens, vec![
3216 Token::Newline,
3217 Token::Ident("echo".to_string()),
3218 Token::Ident("hello".to_string()),
3219 ]);
3220 }
3221
3222 #[test]
3223 fn backticked_arithmetic_in_comment_is_not_expanded() {
3224 let source = "# the `$((x))` syntax explained\necho $((3+4))";
3228 let tokens = lex(source);
3229 assert_eq!(tokens, vec![
3230 Token::Newline,
3231 Token::Ident("echo".to_string()),
3232 Token::Arithmetic("3+4".to_string()),
3233 ]);
3234 }
3235
3236 #[test]
3237 fn arithmetic_still_works_outside_comments() {
3238 let source = "X=$((1+2)); Y=$((3*4))";
3241 let tokens = lex(source);
3242 assert_eq!(tokens, vec![
3243 Token::Ident("X".to_string()),
3244 Token::Eq,
3245 Token::Arithmetic("1+2".to_string()),
3246 Token::Semi,
3247 Token::Ident("Y".to_string()),
3248 Token::Eq,
3249 Token::Arithmetic("3*4".to_string()),
3250 ]);
3251 }
3252
3253 #[test]
3254 fn arithmetic_inside_double_quotes_still_expands() {
3255 let source = "echo \"# $((1+2))\"";
3258 let tokens = lex(source);
3259 assert_eq!(tokens.len(), 2);
3264 assert!(matches!(tokens[0], Token::Ident(_)));
3265 assert!(matches!(tokens[1], Token::String(_)));
3266 }
3267
3268 #[test]
3281 fn backtick_in_source_is_rejected() {
3282 let result = tokenize("echo `date`");
3283 assert!(result.is_err());
3284 let errors = result.unwrap_err();
3285 assert!(errors.iter().any(|e| e.token == LexerError::BackticksNotSupported));
3286 }
3287
3288 #[test]
3289 fn backtick_in_comment_is_just_comment_text() {
3290 let source = "# use `date` here\necho hi";
3293 let tokens = lex(source);
3294 assert_eq!(tokens, vec![
3295 Token::Newline,
3296 Token::Ident("echo".to_string()),
3297 Token::Ident("hi".to_string()),
3298 ]);
3299 }
3300
3301 #[test]
3302 fn backtick_in_single_quoted_string_is_literal() {
3303 let source = "echo '`date`'";
3306 let tokens = lex(source);
3307 assert_eq!(tokens, vec![
3308 Token::Ident("echo".to_string()),
3309 Token::SingleString("`date`".to_string()),
3310 ]);
3311 }
3312
3313 #[test]
3314 fn backtick_in_double_quoted_string_is_literal() {
3315 let source = "echo \"`date`\"";
3320 let tokens = lex(source);
3321 assert_eq!(tokens.len(), 2);
3322 assert!(matches!(tokens[0], Token::Ident(_)));
3323 match &tokens[1] {
3324 Token::String(s) => assert!(s.contains('`')),
3325 other => panic!("expected Token::String, got {:?}", other),
3326 }
3327 }
3328
3329 #[test]
3330 fn backtick_in_heredoc_body_is_preserved() {
3331 let source = "cat <<EOF\n`date`\nEOF\n";
3334 let tokens = lex(source);
3335 let heredoc = tokens.iter().find(|t| matches!(t, Token::HereDoc(_)));
3336 assert!(heredoc.is_some(), "expected a HereDoc token");
3337 if let Some(Token::HereDoc(d)) = heredoc {
3338 assert!(d.content.contains('`'));
3339 }
3340 }
3341
3342 #[test]
3347 fn token_categories() {
3348 assert_eq!(Token::If.category(), TokenCategory::Keyword);
3350 assert_eq!(Token::Then.category(), TokenCategory::Keyword);
3351 assert_eq!(Token::For.category(), TokenCategory::Keyword);
3352 assert_eq!(Token::Function.category(), TokenCategory::Keyword);
3353 assert_eq!(Token::True.category(), TokenCategory::Keyword);
3354 assert_eq!(Token::TypeString.category(), TokenCategory::Keyword);
3355
3356 assert_eq!(Token::Pipe.category(), TokenCategory::Operator);
3358 assert_eq!(Token::And.category(), TokenCategory::Operator);
3359 assert_eq!(Token::Or.category(), TokenCategory::Operator);
3360 assert_eq!(Token::StderrToStdout.category(), TokenCategory::Operator);
3361 assert_eq!(Token::GtGt.category(), TokenCategory::Operator);
3362
3363 assert_eq!(Token::String("test".to_string()).category(), TokenCategory::String);
3365 assert_eq!(Token::SingleString("test".to_string()).category(), TokenCategory::String);
3366 assert_eq!(
3367 Token::HereDoc(HereDocData {
3368 content: "test".to_string(),
3369 literal: false,
3370 strip_tabs: false,
3371 body_start_offset: 0,
3372 }).category(),
3373 TokenCategory::String,
3374 );
3375
3376 assert_eq!(Token::Int(42).category(), TokenCategory::Number);
3378 assert_eq!(Token::Float(3.14).category(), TokenCategory::Number);
3379 assert_eq!(Token::Arithmetic("1+2".to_string()).category(), TokenCategory::Number);
3380
3381 assert_eq!(Token::SimpleVarRef("X".to_string()).category(), TokenCategory::Variable);
3383 assert_eq!(Token::VarRef("${X}".to_string()).category(), TokenCategory::Variable);
3384 assert_eq!(Token::Positional(1).category(), TokenCategory::Variable);
3385 assert_eq!(Token::AllArgs.category(), TokenCategory::Variable);
3386 assert_eq!(Token::ArgCount.category(), TokenCategory::Variable);
3387 assert_eq!(Token::LastExitCode.category(), TokenCategory::Variable);
3388 assert_eq!(Token::CurrentPid.category(), TokenCategory::Variable);
3389
3390 assert_eq!(Token::ShortFlag("l".to_string()).category(), TokenCategory::Flag);
3392 assert_eq!(Token::LongFlag("verbose".to_string()).category(), TokenCategory::Flag);
3393 assert_eq!(Token::PlusFlag("e".to_string()).category(), TokenCategory::Flag);
3394 assert_eq!(Token::DoubleDash.category(), TokenCategory::Flag);
3395
3396 assert_eq!(Token::Semi.category(), TokenCategory::Punctuation);
3398 assert_eq!(Token::LParen.category(), TokenCategory::Punctuation);
3399 assert_eq!(Token::LBracket.category(), TokenCategory::Punctuation);
3400 assert_eq!(Token::Newline.category(), TokenCategory::Punctuation);
3401
3402 assert_eq!(Token::Comment.category(), TokenCategory::Comment);
3404
3405 assert_eq!(Token::Path("/tmp/file".to_string()).category(), TokenCategory::Path);
3407
3408 assert_eq!(Token::Ident("echo".to_string()).category(), TokenCategory::Command);
3410 assert_eq!(Token::NumberIdent("019dda1c".to_string()).category(), TokenCategory::Command);
3411 assert_eq!(Token::DottedIdent(".gitignore".to_string()).category(), TokenCategory::Command);
3412
3413 assert_eq!(Token::InvalidFloatNoLeading.category(), TokenCategory::Error);
3415 assert_eq!(Token::InvalidFloatNoTrailing.category(), TokenCategory::Error);
3416 }
3417
3418 #[test]
3419 fn test_heredoc_piped_to_command() {
3420 let tokens = tokenize("cat <<EOF | jq\n{\"key\": \"val\"}\nEOF").unwrap();
3423 let heredoc_pos = tokens.iter().position(|t| matches!(t.token, Token::HereDoc(_)));
3424 let pipe_pos = tokens.iter().position(|t| matches!(t.token, Token::Pipe));
3425 assert!(heredoc_pos.is_some(), "should have a heredoc token");
3426 assert!(pipe_pos.is_some(), "should have a pipe token");
3427 assert!(
3428 pipe_pos.unwrap() > heredoc_pos.unwrap(),
3429 "Pipe must come after heredoc, got heredoc at {}, pipe at {}. Tokens: {:?}",
3430 heredoc_pos.unwrap(), pipe_pos.unwrap(), tokens,
3431 );
3432 }
3433
3434 #[test]
3435 fn test_heredoc_standalone_still_works() {
3436 let tokens = tokenize("cat <<EOF\nhello\nEOF").unwrap();
3438 assert!(tokens.iter().any(|t| matches!(t.token, Token::HereDoc(_))));
3439 assert!(!tokens.iter().any(|t| matches!(t.token, Token::Pipe)));
3440 }
3441
3442 #[test]
3443 fn test_heredoc_preserves_leading_empty_lines() {
3444 let tokens = tokenize("cat <<EOF\n\nhello\nEOF").unwrap();
3446 let heredoc = tokens.iter().find_map(|t| {
3447 if let Token::HereDoc(data) = &t.token {
3448 Some(data.clone())
3449 } else {
3450 None
3451 }
3452 });
3453 assert!(heredoc.is_some(), "should have a heredoc token");
3454 let data = heredoc.unwrap();
3455 assert!(data.content.starts_with('\n'), "leading empty line must be preserved, got: {:?}", data.content);
3456 assert_eq!(data.content, "\nhello\n");
3457 }
3458
3459 #[test]
3460 fn test_heredoc_quoted_delimiter_sets_literal() {
3461 let tokens = tokenize("cat <<'EOF'\nhello $HOME\nEOF").unwrap();
3463 let heredoc = tokens.iter().find_map(|t| {
3464 if let Token::HereDoc(data) = &t.token {
3465 Some(data.clone())
3466 } else {
3467 None
3468 }
3469 });
3470 assert!(heredoc.is_some(), "should have a heredoc token");
3471 let data = heredoc.unwrap();
3472 assert!(data.literal, "quoted delimiter should set literal=true");
3473 assert_eq!(data.content, "hello $HOME\n");
3474 }
3475
3476 #[test]
3477 fn test_heredoc_unquoted_delimiter_not_literal() {
3478 let tokens = tokenize("cat <<EOF\nhello $HOME\nEOF").unwrap();
3480 let heredoc = tokens.iter().find_map(|t| {
3481 if let Token::HereDoc(data) = &t.token {
3482 Some(data.clone())
3483 } else {
3484 None
3485 }
3486 });
3487 assert!(heredoc.is_some(), "should have a heredoc token");
3488 let data = heredoc.unwrap();
3489 assert!(!data.literal, "unquoted delimiter should have literal=false");
3490 }
3491
3492 #[test]
3497 fn colon_double_in_word() {
3498 assert_eq!(lex("foo::bar"), vec![Token::Ident("foo::bar".into())]);
3499 }
3500
3501 #[test]
3502 fn colon_single_in_word() {
3503 assert_eq!(lex("a:b:c"), vec![Token::Ident("a:b:c".into())]);
3504 }
3505
3506 #[test]
3507 fn colon_with_port() {
3508 assert_eq!(lex("host:8080"), vec![Token::Ident("host:8080".into())]);
3509 }
3510
3511 #[test]
3512 fn colon_standalone() {
3513 assert_eq!(lex(":"), vec![Token::Colon]);
3514 }
3515
3516 #[test]
3517 fn colon_spaced_no_merge() {
3518 assert_eq!(
3519 lex("foo : bar"),
3520 vec![
3521 Token::Ident("foo".into()),
3522 Token::Colon,
3523 Token::Ident("bar".into()),
3524 ]
3525 );
3526 }
3527
3528 #[test]
3529 fn colon_in_command_arg() {
3530 assert_eq!(
3531 lex("echo foo::bar"),
3532 vec![
3533 Token::Ident("echo".into()),
3534 Token::Ident("foo::bar".into()),
3535 ]
3536 );
3537 }
3538
3539 #[test]
3540 fn colon_trailing() {
3541 assert_eq!(lex("foo:"), vec![Token::Ident("foo:".into())]);
3543 }
3544
3545 #[test]
3546 fn colon_leading() {
3547 assert_eq!(lex(":foo"), vec![Token::Ident(":foo".into())]);
3549 }
3550
3551 #[test]
3552 fn colon_with_path() {
3553 assert_eq!(
3555 lex("/usr/bin:8080"),
3556 vec![Token::Ident("/usr/bin:8080".into())]
3557 );
3558 }
3559
3560 #[test]
3565 fn is_keyword_covers_control_flow() {
3566 for t in [
3567 Token::While,
3568 Token::Return,
3569 Token::Break,
3570 Token::Continue,
3571 Token::Exit,
3572 ] {
3573 assert!(t.is_keyword(), "{t:?} should be a keyword");
3574 }
3575 }
3576
3577 #[test]
3578 fn starts_statement_covers_while() {
3579 assert!(Token::While.starts_statement());
3580 }
3581
3582 #[test]
3583 fn is_keyword_rejects_operators() {
3584 for t in [Token::Pipe, Token::Amp, Token::Eq, Token::LBrace] {
3585 assert!(!t.is_keyword(), "{t:?} should not be a keyword");
3586 }
3587 }
3588}