1use logos::{Logos, Span};
17use std::fmt;
18use std::sync::atomic::{AtomicU64, Ordering};
19use std::time::{SystemTime, UNIX_EPOCH};
20
21static MARKER_COUNTER: AtomicU64 = AtomicU64::new(0);
23
24const MAX_PAREN_DEPTH: usize = 256;
27
28#[derive(Debug, Clone)]
32struct SpanReplacement {
33 preprocessed_pos: usize,
35 marker_len: usize,
37 original_len: usize,
39}
40
41fn correct_span(span: Span, replacements: &[SpanReplacement]) -> Span {
43 let mut start_adjustment: isize = 0;
44 let mut end_adjustment: isize = 0;
45
46 for r in replacements {
47 let delta = r.original_len as isize - r.marker_len as isize;
49
50 if span.start > r.preprocessed_pos + r.marker_len {
52 start_adjustment += delta;
53 } else if span.start > r.preprocessed_pos {
54 start_adjustment += delta;
57 }
58
59 if span.end > r.preprocessed_pos + r.marker_len {
61 end_adjustment += delta;
62 } else if span.end > r.preprocessed_pos {
63 end_adjustment += delta;
65 }
66 }
67
68 let new_start = (span.start as isize + start_adjustment).max(0) as usize;
69 let new_end = (span.end as isize + end_adjustment).max(new_start as isize) as usize;
70 new_start..new_end
71}
72
73fn unique_marker_id() -> String {
76 let timestamp = SystemTime::now()
77 .duration_since(UNIX_EPOCH)
78 .map(|d| d.as_nanos())
79 .unwrap_or(0);
80 let counter = MARKER_COUNTER.fetch_add(1, Ordering::Relaxed);
81 #[cfg(target_os = "wasi")]
82 let pid = 0u32;
83 #[cfg(not(target_os = "wasi"))]
84 let pid = std::process::id();
85 format!("{:x}_{:x}_{:x}", timestamp, counter, pid)
86}
87
88#[derive(Debug, Clone, PartialEq)]
90pub struct Spanned<T> {
91 pub token: T,
92 pub span: Span,
93}
94
95impl<T> Spanned<T> {
96 pub fn new(token: T, span: Span) -> Self {
97 Self { token, span }
98 }
99}
100
101#[derive(Debug, Clone, PartialEq, Default)]
103pub enum LexerError {
104 #[default]
105 UnexpectedCharacter,
106 UnterminatedString,
107 UnterminatedVarRef,
108 InvalidEscape,
109 InvalidNumber,
110 AmbiguousBoolean(String),
111 AmbiguousBooleanLike(String),
112 InvalidFloatNoLeading,
113 InvalidFloatNoTrailing,
114 NestingTooDeep,
116 UnterminatedHeredoc { delimiter: String },
120 BackticksNotSupported,
125}
126
127impl fmt::Display for LexerError {
128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 match self {
130 LexerError::UnexpectedCharacter => write!(f, "unexpected character"),
131 LexerError::UnterminatedString => write!(f, "unterminated string"),
132 LexerError::UnterminatedVarRef => write!(f, "unterminated variable reference"),
133 LexerError::InvalidEscape => write!(f, "invalid escape sequence"),
134 LexerError::InvalidNumber => write!(f, "invalid number"),
135 LexerError::AmbiguousBoolean(s) => {
136 write!(f, "ambiguous boolean, use lowercase '{}'", s.to_lowercase())
137 }
138 LexerError::AmbiguousBooleanLike(s) => {
139 let suggest = if s.eq_ignore_ascii_case("yes") { "true" } else { "false" };
140 write!(f, "ambiguous boolean-like '{}', use '{}' or '\"{}\"'", s, suggest, s)
141 }
142 LexerError::InvalidFloatNoLeading => write!(f, "float must have leading digit"),
143 LexerError::InvalidFloatNoTrailing => write!(f, "float must have trailing digit"),
144 LexerError::NestingTooDeep => write!(f, "nesting depth exceeded (max {})", MAX_PAREN_DEPTH),
145 LexerError::UnterminatedHeredoc { delimiter } => {
146 write!(f, "unterminated heredoc, expected closing delimiter `{}` on its own line", delimiter)
147 }
148 LexerError::BackticksNotSupported => {
149 write!(f, "backticks are not supported in kaish; use $(cmd) instead")
150 }
151 }
152 }
153}
154
155#[derive(Debug, Clone, PartialEq)]
178pub struct HereDocData {
179 pub content: String,
180 pub literal: bool,
181 pub strip_tabs: bool,
182 pub body_start_offset: usize,
183}
184
185#[derive(Logos, Debug, Clone, PartialEq)]
186#[logos(error = LexerError)]
187#[logos(skip r"[ \t]+")]
188pub enum Token {
189 #[token("set")]
193 Set,
194
195 #[token("local")]
196 Local,
197
198 #[token("if")]
199 If,
200
201 #[token("then")]
202 Then,
203
204 #[token("else")]
205 Else,
206
207 #[token("elif")]
208 Elif,
209
210 #[token("fi")]
211 Fi,
212
213 #[token("for")]
214 For,
215
216 #[token("while")]
217 While,
218
219 #[token("in")]
220 In,
221
222 #[token("do")]
223 Do,
224
225 #[token("done")]
226 Done,
227
228 #[token("case")]
229 Case,
230
231 #[token("esac")]
232 Esac,
233
234 #[token("function")]
235 Function,
236
237 #[token("break")]
238 Break,
239
240 #[token("continue")]
241 Continue,
242
243 #[token("return")]
244 Return,
245
246 #[token("exit")]
247 Exit,
248
249 #[token("true")]
250 True,
251
252 #[token("false")]
253 False,
254
255 #[token("string")]
259 TypeString,
260
261 #[token("int")]
262 TypeInt,
263
264 #[token("float")]
265 TypeFloat,
266
267 #[token("bool")]
268 TypeBool,
269
270 #[token("&&")]
274 And,
275
276 #[token("||")]
277 Or,
278
279 #[token("==")]
280 EqEq,
281
282 #[token("!=")]
283 NotEq,
284
285 #[token("=~")]
286 Match,
287
288 #[token("!~")]
289 NotMatch,
290
291 #[token(">=")]
292 GtEq,
293
294 #[token("<=")]
295 LtEq,
296
297 #[token(">>")]
298 GtGt,
299
300 #[token("2>&1")]
301 StderrToStdout,
302
303 #[token("1>&2")]
304 StdoutToStderr,
305
306 #[token(">&2")]
307 StdoutToStderr2,
308
309 #[token("2>")]
310 Stderr,
311
312 #[token("&>")]
313 Both,
314
315 #[token("<<<")]
316 HereString,
317
318 #[token("<<")]
319 HereDocStart,
320
321 #[token(";;")]
322 DoubleSemi,
323
324 #[token("=")]
328 Eq,
329
330 #[token("|")]
331 Pipe,
332
333 #[token("&")]
334 Amp,
335
336 #[token(">")]
337 Gt,
338
339 #[token("<")]
340 Lt,
341
342 #[token(";")]
343 Semi,
344
345 #[token(":")]
346 Colon,
347
348 #[token(",")]
349 Comma,
350
351 #[token("..")]
352 DotDot,
353
354 #[token(".")]
355 Dot,
356
357 #[regex(r"~[a-zA-Z0-9_./+-]+", lex_tilde_path, priority = 3)]
359 TildePath(String),
360
361 #[token("~")]
363 Tilde,
364
365 #[regex(r"\.\./[a-zA-Z0-9_./-]+", lex_relative_path, priority = 3)]
371 #[regex(r"[a-zA-Z_][a-zA-Z0-9_.-]*/[a-zA-Z0-9_./-]*", lex_relative_path, priority = 3)]
372 RelativePath(String),
373
374 #[regex(r"\./[a-zA-Z0-9_./-]+", lex_dot_slash_path, priority = 3)]
376 DotSlashPath(String),
377
378 #[regex(r"\.[a-zA-Z_][a-zA-Z0-9_.-]*", lex_dotted_ident, priority = 3)]
384 DottedIdent(String),
385
386 #[token("{")]
387 LBrace,
388
389 #[token("}")]
390 RBrace,
391
392 #[token("[")]
393 LBracket,
394
395 #[token("]")]
396 RBracket,
397
398 #[token("(")]
399 LParen,
400
401 #[token(")")]
402 RParen,
403
404 #[token("*")]
405 Star,
406
407 #[token("!")]
408 Bang,
409
410 #[token("?")]
411 Question,
412
413 GlobWord(String),
416
417 Arithmetic(String),
424
425 #[token("$(")]
427 CmdSubstStart,
428
429 #[regex(r"--[a-zA-Z][a-zA-Z0-9-]*", lex_long_flag, priority = 3)]
435 LongFlag(String),
436
437 #[regex(r"-[a-zA-Z][a-zA-Z0-9-]*", lex_short_flag, priority = 3)]
444 ShortFlag(String),
445
446 #[regex(r"\+[a-zA-Z][a-zA-Z0-9]*", lex_plus_flag, priority = 3)]
448 PlusFlag(String),
449
450 #[token("--")]
452 DoubleDash,
453
454 #[regex(r"\+[^a-zA-Z\s][^\s]*", lex_plus_bare, priority = 2)]
457 PlusBare(String),
458
459 #[regex(r"-[^a-zA-Z0-9\s\-][^\s]*", lex_minus_bare, priority = 1)]
463 MinusBare(String),
464
465 #[regex(r"%[0-9]+", lex_job_spec)]
469 JobSpec(String),
470
471 #[token("-")]
475 MinusAlone,
476
477 #[regex(r#""([^"\\]|\\.)*""#, lex_string)]
483 String(String),
484
485 #[regex(r"'[^']*'", lex_single_string)]
487 SingleString(String),
488
489 #[regex(r"\$\{[^}]+\}", lex_varref)]
491 VarRef(String),
492
493 #[regex(r"\$[a-zA-Z_][a-zA-Z0-9_]*", lex_simple_varref)]
495 SimpleVarRef(String),
496
497 #[regex(r"\$[0-9]", lex_positional)]
499 Positional(usize),
500
501 #[token("$@")]
503 AllArgs,
504
505 #[token("$#")]
507 ArgCount,
508
509 #[token("$?")]
511 LastExitCode,
512
513 #[token("$$")]
515 CurrentPid,
516
517 #[regex(r"\$\{#[a-zA-Z_][a-zA-Z0-9_]*\}", lex_var_length)]
519 VarLength(String),
520
521 HereDoc(HereDocData),
524
525 #[regex(r"-?[0-9]+", lex_int, priority = 2)]
527 Int(i64),
528
529 #[regex(r"-?[0-9]+\.[0-9]+", lex_float)]
531 Float(f64),
532
533 #[regex(r"[0-9]+[a-zA-Z_][a-zA-Z0-9_.-]*", lex_number_ident, priority = 3)]
542 NumberIdent(String),
543
544 #[regex(r"\.[0-9]+", lex_invalid_float_no_leading, priority = 3)]
546 InvalidFloatNoLeading,
547
548 #[regex(r"[0-9]+\.", lex_invalid_float_no_trailing, priority = 2)]
551 InvalidFloatNoTrailing,
552
553 #[regex(r"/[a-zA-Z0-9_./+-]*", lex_path)]
559 Path(String),
560
561 #[regex(r"[a-zA-Z_][a-zA-Z0-9_.-]*", lex_ident)]
568 Ident(String),
569
570 #[regex(r"#[^\n\r]*", allow_greedy = true)]
576 Comment,
577
578 #[regex(r"\n|\r\n")]
580 Newline,
581
582 #[regex(r"\\[ \t]*(\n|\r\n)")]
584 LineContinuation,
585
586 #[token("`", reject_backtick)]
595 BacktickRejected,
596}
597
598#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
603pub enum TokenCategory {
604 Keyword,
606 Operator,
608 String,
610 Number,
612 Variable,
614 Comment,
616 Punctuation,
618 Command,
620 Path,
622 Flag,
624 Error,
626}
627
628impl Token {
629 pub fn category(&self) -> TokenCategory {
631 match self {
632 Token::If
634 | Token::Then
635 | Token::Else
636 | Token::Elif
637 | Token::Fi
638 | Token::For
639 | Token::In
640 | Token::Do
641 | Token::Done
642 | Token::While
643 | Token::Case
644 | Token::Esac
645 | Token::Function
646 | Token::Return
647 | Token::Break
648 | Token::Continue
649 | Token::Exit
650 | Token::Set
651 | Token::Local
652 | Token::True
653 | Token::False
654 | Token::TypeString
655 | Token::TypeInt
656 | Token::TypeFloat
657 | Token::TypeBool => TokenCategory::Keyword,
658
659 Token::Pipe
661 | Token::And
662 | Token::Or
663 | Token::Amp
664 | Token::Eq
665 | Token::EqEq
666 | Token::NotEq
667 | Token::Match
668 | Token::NotMatch
669 | Token::Lt
670 | Token::Gt
671 | Token::LtEq
672 | Token::GtEq
673 | Token::GtGt
674 | Token::Stderr
675 | Token::Both
676 | Token::HereDocStart
677 | Token::HereString
678 | Token::StderrToStdout
679 | Token::StdoutToStderr
680 | Token::StdoutToStderr2 => TokenCategory::Operator,
681
682 Token::String(_) | Token::SingleString(_) | Token::HereDoc(_) => TokenCategory::String,
684
685 Token::Int(_) | Token::Float(_) | Token::Arithmetic(_) => TokenCategory::Number,
687
688 Token::VarRef(_)
690 | Token::SimpleVarRef(_)
691 | Token::Positional(_)
692 | Token::AllArgs
693 | Token::ArgCount
694 | Token::VarLength(_)
695 | Token::LastExitCode
696 | Token::CurrentPid => TokenCategory::Variable,
697
698 Token::LongFlag(_)
700 | Token::ShortFlag(_)
701 | Token::PlusFlag(_)
702 | Token::DoubleDash => TokenCategory::Flag,
703
704 Token::Semi
706 | Token::DoubleSemi
707 | Token::Colon
708 | Token::Comma
709 | Token::Dot
710 | Token::LParen
711 | Token::RParen
712 | Token::LBrace
713 | Token::RBrace
714 | Token::LBracket
715 | Token::RBracket
716 | Token::Bang
717 | Token::Question
718 | Token::Star
719 | Token::Newline
720 | Token::LineContinuation
721 | Token::CmdSubstStart => TokenCategory::Punctuation,
722
723 Token::GlobWord(_) => TokenCategory::Path,
725
726 Token::Comment => TokenCategory::Comment,
728
729 Token::Path(_)
731 | Token::TildePath(_)
732 | Token::RelativePath(_)
733 | Token::Tilde
734 | Token::DotDot
735 | Token::DotSlashPath(_) => TokenCategory::Path,
736
737 Token::Ident(_)
739 | Token::PlusBare(_)
740 | Token::MinusBare(_)
741 | Token::MinusAlone
742 | Token::NumberIdent(_)
743 | Token::DottedIdent(_)
744 | Token::JobSpec(_) => TokenCategory::Command,
745
746 Token::InvalidFloatNoLeading
748 | Token::InvalidFloatNoTrailing
749 | Token::BacktickRejected => TokenCategory::Error,
750 }
751 }
752}
753
754fn lex_string(lex: &mut logos::Lexer<Token>) -> Result<String, LexerError> {
756 parse_string_literal(lex.slice())
757}
758
759fn lex_single_string(lex: &mut logos::Lexer<Token>) -> String {
761 let s = lex.slice();
762 s[1..s.len() - 1].to_string()
764}
765
766fn lex_varref(lex: &mut logos::Lexer<Token>) -> String {
768 lex.slice().to_string()
770}
771
772fn lex_simple_varref(lex: &mut logos::Lexer<Token>) -> String {
774 lex.slice()[1..].to_string()
776}
777
778fn lex_positional(lex: &mut logos::Lexer<Token>) -> usize {
780 lex.slice()[1..].parse().unwrap_or(0)
782}
783
784fn lex_var_length(lex: &mut logos::Lexer<Token>) -> String {
786 let s = lex.slice();
788 s[3..s.len() - 1].to_string()
789}
790
791fn lex_int(lex: &mut logos::Lexer<Token>) -> Result<i64, LexerError> {
793 lex.slice().parse().map_err(|_| LexerError::InvalidNumber)
794}
795
796fn lex_float(lex: &mut logos::Lexer<Token>) -> Result<f64, LexerError> {
798 lex.slice().parse().map_err(|_| LexerError::InvalidNumber)
799}
800
801fn lex_number_ident(lex: &mut logos::Lexer<Token>) -> String {
805 lex.slice().to_string()
806}
807
808fn lex_dotted_ident(lex: &mut logos::Lexer<Token>) -> String {
810 lex.slice().to_string()
811}
812
813fn lex_invalid_float_no_leading(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
816 Err(LexerError::InvalidFloatNoLeading)
817}
818
819fn reject_backtick(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
823 Err(LexerError::BackticksNotSupported)
824}
825
826fn lex_invalid_float_no_trailing(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
829 Err(LexerError::InvalidFloatNoTrailing)
830}
831
832fn lex_ident(lex: &mut logos::Lexer<Token>) -> Result<String, LexerError> {
834 let s = lex.slice();
835
836 match s.to_lowercase().as_str() {
839 "true" | "false" if s != "true" && s != "false" => {
840 return Err(LexerError::AmbiguousBoolean(s.to_string()));
841 }
842 _ => {}
843 }
844
845 if s.eq_ignore_ascii_case("yes") || s.eq_ignore_ascii_case("no") {
847 return Err(LexerError::AmbiguousBooleanLike(s.to_string()));
848 }
849
850 Ok(s.to_string())
851}
852
853fn lex_long_flag(lex: &mut logos::Lexer<Token>) -> String {
855 lex.slice()[2..].to_string()
857}
858
859fn lex_short_flag(lex: &mut logos::Lexer<Token>) -> String {
861 lex.slice()[1..].to_string()
863}
864
865fn lex_plus_flag(lex: &mut logos::Lexer<Token>) -> String {
867 lex.slice()[1..].to_string()
869}
870
871fn lex_plus_bare(lex: &mut logos::Lexer<Token>) -> String {
873 lex.slice().to_string()
874}
875
876fn lex_minus_bare(lex: &mut logos::Lexer<Token>) -> String {
878 lex.slice().to_string()
879}
880
881fn lex_job_spec(lex: &mut logos::Lexer<Token>) -> String {
883 lex.slice().to_string()
884}
885
886fn lex_path(lex: &mut logos::Lexer<Token>) -> String {
888 lex.slice().to_string()
889}
890
891fn lex_tilde_path(lex: &mut logos::Lexer<Token>) -> String {
893 lex.slice().to_string()
894}
895
896fn lex_relative_path(lex: &mut logos::Lexer<Token>) -> String {
898 lex.slice().to_string()
899}
900
901fn lex_dot_slash_path(lex: &mut logos::Lexer<Token>) -> String {
903 lex.slice().to_string()
904}
905
906impl fmt::Display for Token {
907 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
908 match self {
909 Token::Set => write!(f, "set"),
910 Token::Local => write!(f, "local"),
911 Token::If => write!(f, "if"),
912 Token::Then => write!(f, "then"),
913 Token::Else => write!(f, "else"),
914 Token::Elif => write!(f, "elif"),
915 Token::Fi => write!(f, "fi"),
916 Token::For => write!(f, "for"),
917 Token::While => write!(f, "while"),
918 Token::In => write!(f, "in"),
919 Token::Do => write!(f, "do"),
920 Token::Done => write!(f, "done"),
921 Token::Case => write!(f, "case"),
922 Token::Esac => write!(f, "esac"),
923 Token::Function => write!(f, "function"),
924 Token::Break => write!(f, "break"),
925 Token::Continue => write!(f, "continue"),
926 Token::Return => write!(f, "return"),
927 Token::Exit => write!(f, "exit"),
928 Token::True => write!(f, "true"),
929 Token::False => write!(f, "false"),
930 Token::TypeString => write!(f, "string"),
931 Token::TypeInt => write!(f, "int"),
932 Token::TypeFloat => write!(f, "float"),
933 Token::TypeBool => write!(f, "bool"),
934 Token::And => write!(f, "&&"),
935 Token::Or => write!(f, "||"),
936 Token::EqEq => write!(f, "=="),
937 Token::NotEq => write!(f, "!="),
938 Token::Match => write!(f, "=~"),
939 Token::NotMatch => write!(f, "!~"),
940 Token::GtEq => write!(f, ">="),
941 Token::LtEq => write!(f, "<="),
942 Token::GtGt => write!(f, ">>"),
943 Token::StderrToStdout => write!(f, "2>&1"),
944 Token::StdoutToStderr => write!(f, "1>&2"),
945 Token::StdoutToStderr2 => write!(f, ">&2"),
946 Token::Stderr => write!(f, "2>"),
947 Token::Both => write!(f, "&>"),
948 Token::HereDocStart => write!(f, "<<"),
949 Token::HereString => write!(f, "<<<"),
950 Token::DoubleSemi => write!(f, ";;"),
951 Token::Eq => write!(f, "="),
952 Token::Pipe => write!(f, "|"),
953 Token::Amp => write!(f, "&"),
954 Token::Gt => write!(f, ">"),
955 Token::Lt => write!(f, "<"),
956 Token::Semi => write!(f, ";"),
957 Token::Colon => write!(f, ":"),
958 Token::Comma => write!(f, ","),
959 Token::Dot => write!(f, "."),
960 Token::DotDot => write!(f, ".."),
961 Token::Tilde => write!(f, "~"),
962 Token::TildePath(s) => write!(f, "{}", s),
963 Token::RelativePath(s) => write!(f, "{}", s),
964 Token::DotSlashPath(s) => write!(f, "{}", s),
965 Token::LBrace => write!(f, "{{"),
966 Token::RBrace => write!(f, "}}"),
967 Token::LBracket => write!(f, "["),
968 Token::RBracket => write!(f, "]"),
969 Token::LParen => write!(f, "("),
970 Token::RParen => write!(f, ")"),
971 Token::Star => write!(f, "*"),
972 Token::Bang => write!(f, "!"),
973 Token::Question => write!(f, "?"),
974 Token::GlobWord(s) => write!(f, "GLOB({})", s),
975 Token::Arithmetic(s) => write!(f, "ARITHMETIC({})", s),
976 Token::CmdSubstStart => write!(f, "$("),
977 Token::LongFlag(s) => write!(f, "--{}", s),
978 Token::ShortFlag(s) => write!(f, "-{}", s),
979 Token::PlusFlag(s) => write!(f, "+{}", s),
980 Token::DoubleDash => write!(f, "--"),
981 Token::PlusBare(s) => write!(f, "{}", s),
982 Token::MinusBare(s) => write!(f, "{}", s),
983 Token::JobSpec(s) => write!(f, "{}", s),
984 Token::MinusAlone => write!(f, "-"),
985 Token::String(s) => write!(f, "STRING({:?})", s),
986 Token::SingleString(s) => write!(f, "SINGLESTRING({:?})", s),
987 Token::HereDoc(d) => write!(f, "HEREDOC({:?}, literal={})", d.content, d.literal),
988 Token::VarRef(v) => write!(f, "VARREF({})", v),
989 Token::SimpleVarRef(v) => write!(f, "SIMPLEVARREF({})", v),
990 Token::Positional(n) => write!(f, "${}", n),
991 Token::AllArgs => write!(f, "$@"),
992 Token::ArgCount => write!(f, "$#"),
993 Token::LastExitCode => write!(f, "$?"),
994 Token::CurrentPid => write!(f, "$$"),
995 Token::VarLength(v) => write!(f, "${{#{}}}", v),
996 Token::Int(n) => write!(f, "INT({})", n),
997 Token::Float(n) => write!(f, "FLOAT({})", n),
998 Token::Path(s) => write!(f, "PATH({})", s),
999 Token::Ident(s) => write!(f, "IDENT({})", s),
1000 Token::NumberIdent(s) => write!(f, "NUMIDENT({})", s),
1001 Token::DottedIdent(s) => write!(f, "DOTIDENT({})", s),
1002 Token::Comment => write!(f, "COMMENT"),
1003 Token::Newline => write!(f, "NEWLINE"),
1004 Token::LineContinuation => write!(f, "LINECONT"),
1005 Token::InvalidFloatNoLeading => write!(f, "INVALID_FLOAT_NO_LEADING"),
1007 Token::InvalidFloatNoTrailing => write!(f, "INVALID_FLOAT_NO_TRAILING"),
1008 Token::BacktickRejected => write!(f, "BACKTICK_REJECTED"),
1009 }
1010 }
1011}
1012
1013impl Token {
1014 pub fn is_keyword(&self) -> bool {
1019 matches!(
1020 self,
1021 Token::Set
1022 | Token::Local
1023 | Token::If
1024 | Token::Then
1025 | Token::Else
1026 | Token::Elif
1027 | Token::Fi
1028 | Token::For
1029 | Token::In
1030 | Token::Do
1031 | Token::Done
1032 | Token::While
1033 | Token::Case
1034 | Token::Esac
1035 | Token::Function
1036 | Token::Return
1037 | Token::Break
1038 | Token::Continue
1039 | Token::Exit
1040 | Token::True
1041 | Token::False
1042 )
1043 }
1044
1045 pub fn is_type(&self) -> bool {
1047 matches!(
1048 self,
1049 Token::TypeString
1050 | Token::TypeInt
1051 | Token::TypeFloat
1052 | Token::TypeBool
1053 )
1054 }
1055
1056 pub fn starts_statement(&self) -> bool {
1059 matches!(
1060 self,
1061 Token::Set
1062 | Token::Local
1063 | Token::Function
1064 | Token::If
1065 | Token::For
1066 | Token::While
1067 | Token::Case
1068 | Token::Ident(_)
1069 | Token::LBracket
1070 )
1071 }
1072
1073 pub fn is_value(&self) -> bool {
1075 matches!(
1076 self,
1077 Token::String(_)
1078 | Token::SingleString(_)
1079 | Token::HereDoc(_)
1080 | Token::Arithmetic(_)
1081 | Token::Int(_)
1082 | Token::Float(_)
1083 | Token::True
1084 | Token::False
1085 | Token::VarRef(_)
1086 | Token::SimpleVarRef(_)
1087 | Token::CmdSubstStart
1088 | Token::Path(_)
1089 | Token::GlobWord(_)
1090 | Token::LastExitCode
1091 | Token::CurrentPid
1092 )
1093 }
1094}
1095
1096struct ArithmeticPreprocessResult {
1098 text: String,
1100 arithmetics: Vec<(String, String)>,
1102 replacements: Vec<SpanReplacement>,
1104}
1105
1106fn skip_command_substitution(
1115 chars: &[char],
1116 i: &mut usize,
1117 source_pos: &mut usize,
1118 result: &mut String,
1119) {
1120 result.push('$');
1122 result.push('(');
1123 *i += 2;
1124 *source_pos += 2;
1125
1126 let mut depth: usize = 1;
1127 let mut in_single_quote = false;
1128 let mut in_double_quote = false;
1129
1130 while *i < chars.len() && depth > 0 {
1131 let c = chars[*i];
1132
1133 if in_single_quote {
1134 result.push(c);
1135 *source_pos += c.len_utf8();
1136 *i += 1;
1137 if c == '\'' {
1138 in_single_quote = false;
1139 }
1140 continue;
1141 }
1142
1143 if in_double_quote {
1144 if c == '\\' && *i + 1 < chars.len() {
1145 let next = chars[*i + 1];
1146 if next == '"' || next == '\\' || next == '$' || next == '`' {
1147 result.push(c);
1148 result.push(next);
1149 *source_pos += c.len_utf8() + next.len_utf8();
1150 *i += 2;
1151 continue;
1152 }
1153 }
1154 if c == '"' {
1155 in_double_quote = false;
1156 }
1157 result.push(c);
1158 *source_pos += c.len_utf8();
1159 *i += 1;
1160 continue;
1161 }
1162
1163 match c {
1165 '\'' => {
1166 in_single_quote = true;
1167 result.push(c);
1168 *source_pos += c.len_utf8();
1169 *i += 1;
1170 }
1171 '"' => {
1172 in_double_quote = true;
1173 result.push(c);
1174 *source_pos += c.len_utf8();
1175 *i += 1;
1176 }
1177 '\\' if *i + 1 < chars.len() => {
1178 result.push(c);
1179 result.push(chars[*i + 1]);
1180 *source_pos += c.len_utf8() + chars[*i + 1].len_utf8();
1181 *i += 2;
1182 }
1183 '(' => {
1184 depth += 1;
1185 result.push(c);
1186 *source_pos += c.len_utf8();
1187 *i += 1;
1188 }
1189 ')' => {
1190 depth -= 1;
1191 result.push(c);
1192 *source_pos += c.len_utf8();
1193 *i += 1;
1194 }
1195 _ => {
1196 result.push(c);
1197 *source_pos += c.len_utf8();
1198 *i += 1;
1199 }
1200 }
1201 }
1202}
1203
1204fn preprocess_arithmetic(source: &str) -> Result<ArithmeticPreprocessResult, LexerError> {
1218 let mut result = String::with_capacity(source.len());
1219 let mut arithmetics: Vec<(String, String)> = Vec::new();
1220 let mut replacements: Vec<SpanReplacement> = Vec::new();
1221 let mut source_pos: usize = 0;
1222 let chars_vec: Vec<char> = source.chars().collect();
1223 let mut i = 0;
1224
1225 let mut in_double_quote = false;
1228
1229 while i < chars_vec.len() {
1230 let ch = chars_vec[i];
1231
1232 if !in_double_quote && ch == '\\' && i + 1 < chars_vec.len() {
1234 result.push(ch);
1235 result.push(chars_vec[i + 1]);
1236 source_pos += ch.len_utf8() + chars_vec[i + 1].len_utf8();
1237 i += 2;
1238 continue;
1239 }
1240
1241 if ch == '\'' && !in_double_quote {
1243 result.push(ch);
1244 i += 1;
1245 source_pos += 1;
1246 while i < chars_vec.len() && chars_vec[i] != '\'' {
1247 result.push(chars_vec[i]);
1248 source_pos += chars_vec[i].len_utf8();
1249 i += 1;
1250 }
1251 if i < chars_vec.len() {
1252 result.push(chars_vec[i]); source_pos += 1;
1254 i += 1;
1255 }
1256 continue;
1257 }
1258
1259 if ch == '"' {
1261 in_double_quote = !in_double_quote;
1262 result.push(ch);
1263 i += 1;
1264 source_pos += 1;
1265 continue;
1266 }
1267
1268 if in_double_quote && ch == '\\' && i + 1 < chars_vec.len() {
1270 let next = chars_vec[i + 1];
1271 if next == '"' || next == '\\' || next == '$' || next == '`' {
1272 result.push(ch);
1273 result.push(next);
1274 source_pos += ch.len_utf8() + next.len_utf8();
1275 i += 2;
1276 continue;
1277 }
1278 }
1279
1280 if ch == '#' && !in_double_quote {
1288 while i < chars_vec.len() && chars_vec[i] != '\n' && chars_vec[i] != '\r' {
1289 result.push(chars_vec[i]);
1290 source_pos += chars_vec[i].len_utf8();
1291 i += 1;
1292 }
1293 continue;
1294 }
1295
1296 if ch == '$' && i + 1 < chars_vec.len() && chars_vec[i + 1] == '('
1298 && !(i + 2 < chars_vec.len() && chars_vec[i + 2] == '(')
1299 {
1300 skip_command_substitution(&chars_vec, &mut i, &mut source_pos, &mut result);
1301 continue;
1302 }
1303
1304 if ch == '$' && i + 2 < chars_vec.len() && chars_vec[i + 1] == '(' && chars_vec[i + 2] == '(' {
1306 let arith_start_pos = result.len();
1307 let original_start = source_pos;
1308
1309 i += 3;
1311 source_pos += 3;
1312
1313 let mut expr = String::new();
1315 let mut paren_depth: usize = 0;
1316
1317 while i < chars_vec.len() {
1318 let c = chars_vec[i];
1319 match c {
1320 '(' => {
1321 paren_depth += 1;
1322 if paren_depth > MAX_PAREN_DEPTH {
1323 return Err(LexerError::NestingTooDeep);
1324 }
1325 expr.push('(');
1326 i += 1;
1327 source_pos += c.len_utf8();
1328 }
1329 ')' => {
1330 if paren_depth > 0 {
1331 paren_depth -= 1;
1332 expr.push(')');
1333 i += 1;
1334 source_pos += 1;
1335 } else if i + 1 < chars_vec.len() && chars_vec[i + 1] == ')' {
1336 i += 2;
1338 source_pos += 2;
1339 break;
1340 } else {
1341 expr.push(')');
1343 i += 1;
1344 source_pos += 1;
1345 }
1346 }
1347 _ => {
1348 expr.push(c);
1349 i += 1;
1350 source_pos += c.len_utf8();
1351 }
1352 }
1353 }
1354
1355 let original_len = source_pos - original_start;
1357
1358 let marker = format!("__KAISH_ARITH_{}__", unique_marker_id());
1360 let marker_len = marker.len();
1361
1362 replacements.push(SpanReplacement {
1364 preprocessed_pos: arith_start_pos,
1365 marker_len,
1366 original_len,
1367 });
1368
1369 arithmetics.push((marker.clone(), expr));
1370 result.push_str(&marker);
1371 } else {
1372 result.push(ch);
1373 i += 1;
1374 source_pos += ch.len_utf8();
1375 }
1376 }
1377
1378 Ok(ArithmeticPreprocessResult {
1379 text: result,
1380 arithmetics,
1381 replacements,
1382 })
1383}
1384
1385#[derive(Debug, Clone)]
1400struct HeredocReplacement {
1401 marker: String,
1402 body: String,
1403 literal: bool,
1404 strip_tabs: bool,
1405 body_start_offset: usize,
1406}
1407
1408fn preprocess_heredocs(source: &str) -> Result<(String, Vec<HeredocReplacement>), Spanned<LexerError>> {
1420 let mut result = String::with_capacity(source.len());
1421 let mut heredocs: Vec<HeredocReplacement> = Vec::new();
1422 let chars_vec: Vec<char> = source.chars().collect();
1423 let mut i = 0;
1424 let mut pos: usize = 0;
1428
1429 while i < chars_vec.len() {
1430 let ch = chars_vec[i];
1431
1432 if ch == '<'
1436 && chars_vec.get(i + 1) == Some(&'<')
1437 && chars_vec.get(i + 2) == Some(&'<')
1438 {
1439 result.push_str("<<<");
1440 i += 3;
1441 pos += 3;
1442 continue;
1443 }
1444
1445 if ch == '<' && chars_vec.get(i + 1) == Some(&'<') {
1447 let introducer_start = pos;
1450 i += 2; pos += 2;
1452
1453 let strip_tabs = chars_vec.get(i) == Some(&'-');
1455 if strip_tabs {
1456 i += 1;
1457 pos += 1;
1458 }
1459
1460 while let Some(&c) = chars_vec.get(i) {
1462 if c == ' ' || c == '\t' {
1463 i += 1;
1464 pos += 1;
1465 } else {
1466 break;
1467 }
1468 }
1469
1470 let mut delimiter = String::new();
1472 let quoted = chars_vec.get(i) == Some(&'\'') || chars_vec.get(i) == Some(&'"');
1473 let quote_char = if quoted {
1474 let q = chars_vec.get(i).copied();
1475 i += 1;
1476 pos += 1;
1477 q
1478 } else {
1479 None
1480 };
1481
1482 while let Some(&c) = chars_vec.get(i) {
1483 if quoted {
1484 if Some(c) == quote_char {
1485 i += 1; pos += 1;
1487 break;
1488 }
1489 } else if c.is_whitespace() || c == '\n' || c == '\r' {
1490 break;
1491 }
1492 delimiter.push(c);
1493 i += 1;
1494 pos += c.len_utf8();
1495 }
1496
1497 if delimiter.is_empty() {
1498 result.push_str("<<");
1500 if strip_tabs {
1501 result.push('-');
1502 }
1503 continue;
1504 }
1505
1506 let mut after_delimiter = String::new();
1509 while let Some(&c) = chars_vec.get(i) {
1510 if c == '\n' {
1511 i += 1;
1512 pos += 1;
1513 break;
1514 } else if c == '\r' {
1515 i += 1;
1516 pos += 1;
1517 if chars_vec.get(i) == Some(&'\n') {
1518 i += 1;
1519 pos += 1;
1520 }
1521 break;
1522 }
1523 after_delimiter.push(c);
1524 i += 1;
1525 pos += c.len_utf8();
1526 }
1527
1528 let body_start_offset = pos;
1534 let mut content = String::new();
1535 let mut current_line = String::new();
1536
1537 loop {
1538 let next = chars_vec.get(i).copied();
1539 match next {
1540 Some('\n') => {
1541 i += 1;
1542 pos += 1;
1543 let trimmed = if strip_tabs {
1545 current_line.trim_start_matches('\t')
1546 } else {
1547 ¤t_line
1548 };
1549 if trimmed == delimiter {
1550 break;
1552 }
1553 content.push_str(¤t_line);
1555 content.push('\n');
1556 current_line.clear();
1557 }
1558 Some('\r') => {
1559 i += 1;
1560 pos += 1;
1561 let crlf = chars_vec.get(i) == Some(&'\n');
1567 if crlf {
1568 i += 1;
1569 pos += 1;
1570 }
1571 let trimmed = if strip_tabs {
1572 current_line.trim_start_matches('\t')
1573 } else {
1574 ¤t_line
1575 };
1576 if trimmed == delimiter {
1577 break;
1578 }
1579 content.push_str(¤t_line);
1580 content.push_str(if crlf { "\r\n" } else { "\r" });
1581 current_line.clear();
1582 }
1583 Some(c) => {
1584 current_line.push(c);
1585 i += 1;
1586 pos += c.len_utf8();
1587 }
1588 None => {
1589 let trimmed = if strip_tabs {
1592 current_line.trim_start_matches('\t')
1593 } else {
1594 ¤t_line
1595 };
1596 if trimmed == delimiter {
1597 break;
1598 }
1599 let span_end = introducer_start
1604 + 2
1605 + if strip_tabs { 1 } else { 0 }
1606 + delimiter.len();
1607 return Err(Spanned::new(
1608 LexerError::UnterminatedHeredoc {
1609 delimiter: delimiter.clone(),
1610 },
1611 introducer_start..span_end,
1612 ));
1613 }
1614 }
1615 }
1616
1617 let marker = format!("__KAISH_HEREDOC_{}__", unique_marker_id());
1619 heredocs.push(HeredocReplacement {
1620 marker: marker.clone(),
1621 body: content,
1622 literal: quoted,
1623 strip_tabs,
1624 body_start_offset,
1625 });
1626
1627 result.push_str("<<");
1630 result.push_str(&marker);
1631 result.push_str(&after_delimiter);
1632 result.push('\n');
1633 } else {
1634 result.push(ch);
1635 i += 1;
1636 pos += ch.len_utf8();
1637 }
1638 }
1639
1640 Ok((result, heredocs))
1641}
1642
1643fn mergeable_text(token: &Token) -> Option<String> {
1648 match token {
1649 Token::Ident(s) => Some(s.clone()),
1650 Token::NumberIdent(s) => Some(s.clone()),
1651 Token::DottedIdent(s) => Some(s.clone()),
1652 Token::Colon => Some(":".to_string()),
1653 Token::Int(n) => Some(n.to_string()),
1654 Token::Path(p) => Some(p.clone()),
1655 Token::Float(f) => Some(f.to_string()),
1656 _ => None,
1657 }
1658}
1659
1660fn merge_colon_adjacent(tokens: Vec<Spanned<Token>>) -> Vec<Spanned<Token>> {
1669 if tokens.is_empty() {
1670 return tokens;
1671 }
1672
1673 let mut result = Vec::with_capacity(tokens.len());
1674 let mut run: Vec<&Spanned<Token>> = Vec::new();
1675
1676 for token in &tokens {
1677 if run.is_empty() {
1678 if mergeable_text(&token.token).is_some() {
1679 run.push(token);
1680 } else {
1681 result.push(token.clone());
1682 }
1683 continue;
1684 }
1685
1686 let Some(last) = run.last() else { unreachable!() };
1689 let adjacent = last.span.end == token.span.start;
1690
1691 if adjacent && mergeable_text(&token.token).is_some() {
1692 run.push(token);
1693 } else {
1694 flush_colon_run(&mut run, &mut result);
1695 if mergeable_text(&token.token).is_some() {
1696 run.push(token);
1697 } else {
1698 result.push(token.clone());
1699 }
1700 }
1701 }
1702
1703 flush_colon_run(&mut run, &mut result);
1704
1705 result
1706}
1707
1708fn flush_colon_run(run: &mut Vec<&Spanned<Token>>, result: &mut Vec<Spanned<Token>>) {
1710 if run.is_empty() {
1711 return;
1712 }
1713
1714 let has_colon = run.iter().any(|t| matches!(t.token, Token::Colon));
1715
1716 if run.len() >= 2 && has_colon {
1717 let text: String = run
1718 .iter()
1719 .filter_map(|t| mergeable_text(&t.token))
1720 .collect();
1721 let start = run.first().map(|t| t.span.start).unwrap_or(0);
1723 let end = run.last().map(|t| t.span.end).unwrap_or(0);
1724 result.push(Spanned::new(Token::Ident(text), start..end));
1725 } else {
1726 for t in run.iter() {
1727 result.push((*t).clone());
1728 }
1729 }
1730
1731 run.clear();
1732}
1733
1734fn glob_mergeable_text(token: &Token) -> Option<String> {
1739 match token {
1740 Token::Star => Some("*".to_string()),
1741 Token::Question => Some("?".to_string()),
1742 Token::Dot => Some(".".to_string()),
1743 Token::DotDot => Some("..".to_string()),
1744 Token::Ident(s) => Some(s.clone()),
1745 Token::NumberIdent(s) => Some(s.clone()),
1746 Token::DottedIdent(s) => Some(s.clone()),
1747 Token::Path(s) => Some(s.clone()),
1748 Token::Int(n) => Some(n.to_string()),
1749 Token::LBracket => Some("[".to_string()),
1750 Token::RBracket => Some("]".to_string()),
1751 Token::Bang => Some("!".to_string()),
1752 Token::DotSlashPath(s) => Some(s.clone()),
1753 Token::RelativePath(s) => Some(s.clone()),
1754 Token::TildePath(s) => Some(s.clone()),
1755 Token::Tilde => Some("~".to_string()),
1756 Token::LBrace => Some("{".to_string()),
1757 Token::RBrace => Some("}".to_string()),
1758 Token::Comma => Some(",".to_string()),
1759 _ => None,
1760 }
1761}
1762
1763fn merge_glob_adjacent(tokens: Vec<Spanned<Token>>) -> Vec<Spanned<Token>> {
1771 if tokens.is_empty() {
1772 return tokens;
1773 }
1774
1775 let mut result = Vec::with_capacity(tokens.len());
1776 let mut run: Vec<&Spanned<Token>> = Vec::new();
1777
1778 for token in &tokens {
1779 if run.is_empty() {
1780 if glob_mergeable_text(&token.token).is_some() {
1781 run.push(token);
1782 } else {
1783 result.push(token.clone());
1784 }
1785 continue;
1786 }
1787
1788 let Some(last) = run.last() else { unreachable!() };
1790 let adjacent = last.span.end == token.span.start;
1791
1792 if adjacent && glob_mergeable_text(&token.token).is_some() {
1793 run.push(token);
1794 } else {
1795 flush_glob_run(&mut run, &mut result);
1796 if glob_mergeable_text(&token.token).is_some() {
1797 run.push(token);
1798 } else {
1799 result.push(token.clone());
1800 }
1801 }
1802 }
1803
1804 flush_glob_run(&mut run, &mut result);
1805
1806 result
1807}
1808
1809fn flush_glob_run(run: &mut Vec<&Spanned<Token>>, result: &mut Vec<Spanned<Token>>) {
1811 if run.is_empty() {
1812 return;
1813 }
1814
1815 let has_glob = run.iter().any(|t| {
1816 matches!(t.token, Token::Star | Token::Question)
1817 }) || (run.iter().any(|t| matches!(t.token, Token::LBracket))
1818 && run.iter().any(|t| matches!(t.token, Token::RBracket)));
1819
1820 if run.len() >= 2 && has_glob {
1821 let text: String = run
1822 .iter()
1823 .filter_map(|t| glob_mergeable_text(&t.token))
1824 .collect();
1825 let start = run.first().map(|t| t.span.start).unwrap_or(0);
1826 let end = run.last().map(|t| t.span.end).unwrap_or(0);
1827 result.push(Spanned::new(Token::GlobWord(text), start..end));
1828 } else {
1829 for t in run.iter() {
1830 result.push((*t).clone());
1831 }
1832 }
1833
1834 run.clear();
1835}
1836
1837pub fn tokenize(source: &str) -> Result<Vec<Spanned<Token>>, Vec<Spanned<LexerError>>> {
1847 let arith_result = preprocess_arithmetic(source)
1849 .map_err(|e| vec![Spanned::new(e, 0..source.len())])?;
1850
1851 let span_replacements = arith_result.replacements;
1855 let (preprocessed, heredocs) = preprocess_heredocs(&arith_result.text)
1856 .map_err(|e| {
1857 let span = correct_span(e.span, &span_replacements);
1858 vec![Spanned::new(e.token, span)]
1859 })?;
1860
1861 let lexer = Token::lexer(&preprocessed);
1862 let mut tokens = Vec::new();
1863 let mut errors = Vec::new();
1864
1865 for (result, span) in lexer.spanned() {
1866 let corrected_span = correct_span(span, &span_replacements);
1868 match result {
1869 Ok(token) => {
1870 if !matches!(token, Token::Comment | Token::LineContinuation) {
1872 tokens.push(Spanned::new(token, corrected_span));
1873 }
1874 }
1875 Err(err) => {
1876 errors.push(Spanned::new(err, corrected_span));
1877 }
1878 }
1879 }
1880
1881 if !errors.is_empty() {
1882 return Err(errors);
1883 }
1884
1885 let mut final_tokens = Vec::with_capacity(tokens.len());
1887 let mut i = 0;
1888
1889 while i < tokens.len() {
1890 if let Token::Ident(ref name) = tokens[i].token
1892 && name.starts_with("__KAISH_ARITH_") && name.ends_with("__")
1893 && let Some((_, expr)) = arith_result.arithmetics.iter().find(|(marker, _)| marker == name) {
1894 final_tokens.push(Spanned::new(Token::Arithmetic(expr.clone()), tokens[i].span.clone()));
1895 i += 1;
1896 continue;
1897 }
1898
1899 if matches!(tokens[i].token, Token::HereDocStart) {
1901 if i + 1 < tokens.len()
1903 && let Token::Ident(ref name) = tokens[i + 1].token
1904 && name.starts_with("__KAISH_HEREDOC_") && name.ends_with("__") {
1905 if let Some(hd) = heredocs.iter().find(|h| h.marker == *name) {
1907 let mut content = hd.body.clone();
1919 for (marker, expr) in &arith_result.arithmetics {
1920 if content.contains(marker) {
1921 let replacement = if hd.literal {
1922 format!("$(({}))", expr)
1923 } else {
1924 format!("${{__ARITH:{}__}}", expr)
1925 };
1926 content = content.replace(marker, &replacement);
1927 }
1928 }
1929 final_tokens.push(Spanned::new(Token::HereDocStart, tokens[i].span.clone()));
1930 final_tokens.push(Spanned::new(
1931 Token::HereDoc(HereDocData {
1932 content,
1933 literal: hd.literal,
1934 strip_tabs: hd.strip_tabs,
1935 body_start_offset: hd.body_start_offset,
1936 }),
1937 tokens[i + 1].span.clone(),
1938 ));
1939 i += 2;
1940 continue;
1941 }
1942 }
1943 }
1944
1945 let token = if let Token::String(ref s) = tokens[i].token {
1947 let mut new_content = s.clone();
1949 for (marker, expr) in &arith_result.arithmetics {
1950 if new_content.contains(marker) {
1951 new_content = new_content.replace(marker, &format!("${{__ARITH:{}__}}", expr));
1954 }
1955 }
1956 if new_content != *s {
1957 Spanned::new(Token::String(new_content), tokens[i].span.clone())
1958 } else {
1959 tokens[i].clone()
1960 }
1961 } else {
1962 tokens[i].clone()
1963 };
1964 final_tokens.push(token);
1965 i += 1;
1966 }
1967
1968 Ok(merge_glob_adjacent(merge_colon_adjacent(final_tokens)))
1969}
1970
1971pub fn tokenize_with_comments(source: &str) -> Result<Vec<Spanned<Token>>, Vec<Spanned<LexerError>>> {
1975 let lexer = Token::lexer(source);
1976 let mut tokens = Vec::new();
1977 let mut errors = Vec::new();
1978
1979 for (result, span) in lexer.spanned() {
1980 match result {
1981 Ok(token) => {
1982 tokens.push(Spanned::new(token, span));
1983 }
1984 Err(err) => {
1985 errors.push(Spanned::new(err, span));
1986 }
1987 }
1988 }
1989
1990 if errors.is_empty() {
1991 Ok(tokens)
1992 } else {
1993 Err(errors)
1994 }
1995}
1996
1997pub fn parse_string_literal(source: &str) -> Result<String, LexerError> {
1999 if source.len() < 2 || !source.starts_with('"') || !source.ends_with('"') {
2001 return Err(LexerError::UnterminatedString);
2002 }
2003
2004 let inner = &source[1..source.len() - 1];
2005 let mut result = String::with_capacity(inner.len());
2006 let mut chars = inner.chars().peekable();
2007
2008 while let Some(ch) = chars.next() {
2009 if ch == '\\' {
2010 match chars.next() {
2011 Some('n') => result.push('\n'),
2012 Some('t') => result.push('\t'),
2013 Some('r') => result.push('\r'),
2014 Some('\\') => result.push('\\'),
2015 Some('"') => result.push('"'),
2016 Some('$') => result.push_str("__KAISH_ESCAPED_DOLLAR__"),
2019 Some('u') => {
2020 let mut hex = String::with_capacity(4);
2022 for _ in 0..4 {
2023 match chars.next() {
2024 Some(h) if h.is_ascii_hexdigit() => hex.push(h),
2025 _ => return Err(LexerError::InvalidEscape),
2026 }
2027 }
2028 let codepoint = u32::from_str_radix(&hex, 16)
2029 .map_err(|_| LexerError::InvalidEscape)?;
2030 let ch = char::from_u32(codepoint)
2031 .ok_or(LexerError::InvalidEscape)?;
2032 result.push(ch);
2033 }
2034 Some(next) => {
2036 result.push('\\');
2037 result.push(next);
2038 }
2039 None => return Err(LexerError::InvalidEscape),
2040 }
2041 } else {
2042 result.push(ch);
2043 }
2044 }
2045
2046 Ok(result)
2047}
2048
2049pub fn parse_var_ref(source: &str) -> Result<Vec<String>, LexerError> {
2052 if source.len() < 4 || !source.starts_with("${") || !source.ends_with('}') {
2054 return Err(LexerError::UnterminatedVarRef);
2055 }
2056
2057 let inner = &source[2..source.len() - 1];
2058
2059 if inner == "?" {
2061 return Ok(vec!["?".to_string()]);
2062 }
2063
2064 let mut segments = Vec::new();
2065 let mut current = String::new();
2066 let mut chars = inner.chars().peekable();
2067
2068 while let Some(ch) = chars.next() {
2069 match ch {
2070 '.' => {
2071 if !current.is_empty() {
2072 segments.push(current.clone());
2073 current.clear();
2074 }
2075 }
2076 '[' => {
2077 if !current.is_empty() {
2078 segments.push(current.clone());
2079 current.clear();
2080 }
2081 let mut index = String::from("[");
2083 while let Some(&c) = chars.peek() {
2084 if let Some(c) = chars.next() {
2085 index.push(c);
2086 }
2087 if c == ']' {
2088 break;
2089 }
2090 }
2091 segments.push(index);
2092 }
2093 _ => {
2094 current.push(ch);
2095 }
2096 }
2097 }
2098
2099 if !current.is_empty() {
2100 segments.push(current);
2101 }
2102
2103 Ok(segments)
2104}
2105
2106pub fn parse_int(source: &str) -> Result<i64, LexerError> {
2108 source.parse().map_err(|_| LexerError::InvalidNumber)
2109}
2110
2111pub fn parse_float(source: &str) -> Result<f64, LexerError> {
2113 source.parse().map_err(|_| LexerError::InvalidNumber)
2114}
2115
2116#[cfg(test)]
2117mod tests {
2118 use super::*;
2119
2120 fn lex(source: &str) -> Vec<Token> {
2121 tokenize(source)
2122 .expect("lexer should succeed")
2123 .into_iter()
2124 .map(|s| s.token)
2125 .collect()
2126 }
2127
2128 #[test]
2133 fn keywords() {
2134 assert_eq!(lex("set"), vec![Token::Set]);
2135 assert_eq!(lex("if"), vec![Token::If]);
2136 assert_eq!(lex("then"), vec![Token::Then]);
2137 assert_eq!(lex("else"), vec![Token::Else]);
2138 assert_eq!(lex("elif"), vec![Token::Elif]);
2139 assert_eq!(lex("fi"), vec![Token::Fi]);
2140 assert_eq!(lex("for"), vec![Token::For]);
2141 assert_eq!(lex("in"), vec![Token::In]);
2142 assert_eq!(lex("do"), vec![Token::Do]);
2143 assert_eq!(lex("done"), vec![Token::Done]);
2144 assert_eq!(lex("case"), vec![Token::Case]);
2145 assert_eq!(lex("esac"), vec![Token::Esac]);
2146 assert_eq!(lex("function"), vec![Token::Function]);
2147 assert_eq!(lex("true"), vec![Token::True]);
2148 assert_eq!(lex("false"), vec![Token::False]);
2149 }
2150
2151 #[test]
2152 fn double_semicolon() {
2153 assert_eq!(lex(";;"), vec![Token::DoubleSemi]);
2154 assert_eq!(lex("echo \"hi\";;"), vec![
2156 Token::Ident("echo".to_string()),
2157 Token::String("hi".to_string()),
2158 Token::DoubleSemi,
2159 ]);
2160 }
2161
2162 #[test]
2163 fn type_keywords() {
2164 assert_eq!(lex("string"), vec![Token::TypeString]);
2165 assert_eq!(lex("int"), vec![Token::TypeInt]);
2166 assert_eq!(lex("float"), vec![Token::TypeFloat]);
2167 assert_eq!(lex("bool"), vec![Token::TypeBool]);
2168 }
2169
2170 #[test]
2175 fn single_char_operators() {
2176 assert_eq!(lex("="), vec![Token::Eq]);
2177 assert_eq!(lex("|"), vec![Token::Pipe]);
2178 assert_eq!(lex("&"), vec![Token::Amp]);
2179 assert_eq!(lex(">"), vec![Token::Gt]);
2180 assert_eq!(lex("<"), vec![Token::Lt]);
2181 assert_eq!(lex(";"), vec![Token::Semi]);
2182 assert_eq!(lex(":"), vec![Token::Colon]);
2183 assert_eq!(lex(","), vec![Token::Comma]);
2184 assert_eq!(lex("."), vec![Token::Dot]);
2185 }
2186
2187 #[test]
2188 fn multi_char_operators() {
2189 assert_eq!(lex("&&"), vec![Token::And]);
2190 assert_eq!(lex("||"), vec![Token::Or]);
2191 assert_eq!(lex("=="), vec![Token::EqEq]);
2192 assert_eq!(lex("!="), vec![Token::NotEq]);
2193 assert_eq!(lex("=~"), vec![Token::Match]);
2194 assert_eq!(lex("!~"), vec![Token::NotMatch]);
2195 assert_eq!(lex(">="), vec![Token::GtEq]);
2196 assert_eq!(lex("<="), vec![Token::LtEq]);
2197 assert_eq!(lex(">>"), vec![Token::GtGt]);
2198 assert_eq!(lex("2>"), vec![Token::Stderr]);
2199 assert_eq!(lex("&>"), vec![Token::Both]);
2200 }
2201
2202 #[test]
2203 fn brackets() {
2204 assert_eq!(lex("{"), vec![Token::LBrace]);
2205 assert_eq!(lex("}"), vec![Token::RBrace]);
2206 assert_eq!(lex("["), vec![Token::LBracket]);
2207 assert_eq!(lex("]"), vec![Token::RBracket]);
2208 assert_eq!(lex("("), vec![Token::LParen]);
2209 assert_eq!(lex(")"), vec![Token::RParen]);
2210 }
2211
2212 #[test]
2217 fn integers() {
2218 assert_eq!(lex("0"), vec![Token::Int(0)]);
2219 assert_eq!(lex("42"), vec![Token::Int(42)]);
2220 assert_eq!(lex("-1"), vec![Token::Int(-1)]);
2221 assert_eq!(lex("999999"), vec![Token::Int(999999)]);
2222 }
2223
2224 #[test]
2225 fn floats() {
2226 assert_eq!(lex("3.14"), vec![Token::Float(3.14)]);
2227 assert_eq!(lex("-0.5"), vec![Token::Float(-0.5)]);
2228 assert_eq!(lex("123.456"), vec![Token::Float(123.456)]);
2229 }
2230
2231 #[test]
2232 fn strings() {
2233 assert_eq!(lex(r#""hello""#), vec![Token::String("hello".to_string())]);
2234 assert_eq!(lex(r#""hello world""#), vec![Token::String("hello world".to_string())]);
2235 assert_eq!(lex(r#""""#), vec![Token::String("".to_string())]); assert_eq!(lex(r#""with \"quotes\"""#), vec![Token::String("with \"quotes\"".to_string())]);
2237 assert_eq!(lex(r#""with\nnewline""#), vec![Token::String("with\nnewline".to_string())]);
2238 }
2239
2240 #[test]
2241 fn var_refs() {
2242 assert_eq!(lex("${X}"), vec![Token::VarRef("${X}".to_string())]);
2243 assert_eq!(lex("${VAR}"), vec![Token::VarRef("${VAR}".to_string())]);
2244 assert_eq!(lex("${VAR.field}"), vec![Token::VarRef("${VAR.field}".to_string())]);
2245 assert_eq!(lex("${VAR[0]}"), vec![Token::VarRef("${VAR[0]}".to_string())]);
2246 }
2247
2248 #[test]
2253 fn identifiers() {
2254 assert_eq!(lex("foo"), vec![Token::Ident("foo".to_string())]);
2255 assert_eq!(lex("foo_bar"), vec![Token::Ident("foo_bar".to_string())]);
2256 assert_eq!(lex("foo-bar"), vec![Token::Ident("foo-bar".to_string())]);
2257 assert_eq!(lex("_private"), vec![Token::Ident("_private".to_string())]);
2258 assert_eq!(lex("cmd123"), vec![Token::Ident("cmd123".to_string())]);
2259 }
2260
2261 #[test]
2262 fn keyword_prefix_identifiers() {
2263 assert_eq!(lex("setup"), vec![Token::Ident("setup".to_string())]);
2265 assert_eq!(lex("kaish-tools"), vec![Token::Ident("kaish-tools".to_string())]);
2266 assert_eq!(lex("iffy"), vec![Token::Ident("iffy".to_string())]);
2267 assert_eq!(lex("forked"), vec![Token::Ident("forked".to_string())]);
2268 assert_eq!(lex("done-with-it"), vec![Token::Ident("done-with-it".to_string())]);
2269 }
2270
2271 #[test]
2276 fn assignment() {
2277 assert_eq!(
2278 lex("set X = 5"),
2279 vec![Token::Set, Token::Ident("X".to_string()), Token::Eq, Token::Int(5)]
2280 );
2281 }
2282
2283 #[test]
2284 fn command_simple() {
2285 assert_eq!(lex("echo"), vec![Token::Ident("echo".to_string())]);
2286 assert_eq!(
2287 lex(r#"echo "hello""#),
2288 vec![Token::Ident("echo".to_string()), Token::String("hello".to_string())]
2289 );
2290 }
2291
2292 #[test]
2293 fn command_with_args() {
2294 assert_eq!(
2295 lex("cmd arg1 arg2"),
2296 vec![Token::Ident("cmd".to_string()), Token::Ident("arg1".to_string()), Token::Ident("arg2".to_string())]
2297 );
2298 }
2299
2300 #[test]
2301 fn command_with_named_args() {
2302 assert_eq!(
2303 lex("cmd key=value"),
2304 vec![Token::Ident("cmd".to_string()), Token::Ident("key".to_string()), Token::Eq, Token::Ident("value".to_string())]
2305 );
2306 }
2307
2308 #[test]
2309 fn pipeline() {
2310 assert_eq!(
2311 lex("a | b | c"),
2312 vec![Token::Ident("a".to_string()), Token::Pipe, Token::Ident("b".to_string()), Token::Pipe, Token::Ident("c".to_string())]
2313 );
2314 }
2315
2316 #[test]
2317 fn if_statement() {
2318 assert_eq!(
2319 lex("if true; then echo; fi"),
2320 vec![
2321 Token::If,
2322 Token::True,
2323 Token::Semi,
2324 Token::Then,
2325 Token::Ident("echo".to_string()),
2326 Token::Semi,
2327 Token::Fi
2328 ]
2329 );
2330 }
2331
2332 #[test]
2333 fn for_loop() {
2334 assert_eq!(
2335 lex("for X in items; do echo; done"),
2336 vec![
2337 Token::For,
2338 Token::Ident("X".to_string()),
2339 Token::In,
2340 Token::Ident("items".to_string()),
2341 Token::Semi,
2342 Token::Do,
2343 Token::Ident("echo".to_string()),
2344 Token::Semi,
2345 Token::Done
2346 ]
2347 );
2348 }
2349
2350 #[test]
2355 fn whitespace_ignored() {
2356 assert_eq!(lex(" set X = 5 "), lex("set X = 5"));
2357 }
2358
2359 #[test]
2360 fn newlines_preserved() {
2361 let tokens = lex("a\nb");
2362 assert_eq!(
2363 tokens,
2364 vec![Token::Ident("a".to_string()), Token::Newline, Token::Ident("b".to_string())]
2365 );
2366 }
2367
2368 #[test]
2369 fn multiple_newlines() {
2370 let tokens = lex("a\n\n\nb");
2371 assert_eq!(
2372 tokens,
2373 vec![Token::Ident("a".to_string()), Token::Newline, Token::Newline, Token::Newline, Token::Ident("b".to_string())]
2374 );
2375 }
2376
2377 #[test]
2382 fn comments_skipped() {
2383 assert_eq!(lex("# comment"), vec![]);
2384 assert_eq!(lex("a # comment"), vec![Token::Ident("a".to_string())]);
2385 assert_eq!(
2386 lex("a # comment\nb"),
2387 vec![Token::Ident("a".to_string()), Token::Newline, Token::Ident("b".to_string())]
2388 );
2389 }
2390
2391 #[test]
2392 fn comments_preserved_when_requested() {
2393 let tokens = tokenize_with_comments("a # comment")
2394 .expect("should succeed")
2395 .into_iter()
2396 .map(|s| s.token)
2397 .collect::<Vec<_>>();
2398 assert_eq!(tokens, vec![Token::Ident("a".to_string()), Token::Comment]);
2399 }
2400
2401 #[test]
2406 fn parse_simple_string() {
2407 assert_eq!(parse_string_literal(r#""hello""#).expect("ok"), "hello");
2408 }
2409
2410 #[test]
2411 fn parse_string_with_escapes() {
2412 assert_eq!(
2413 parse_string_literal(r#""hello\nworld""#).expect("ok"),
2414 "hello\nworld"
2415 );
2416 assert_eq!(
2417 parse_string_literal(r#""tab\there""#).expect("ok"),
2418 "tab\there"
2419 );
2420 assert_eq!(
2421 parse_string_literal(r#""quote\"here""#).expect("ok"),
2422 "quote\"here"
2423 );
2424 }
2425
2426 #[test]
2427 fn parse_string_with_unicode() {
2428 assert_eq!(
2429 parse_string_literal(r#""emoji \u2764""#).expect("ok"),
2430 "emoji ❤"
2431 );
2432 }
2433
2434 #[test]
2435 fn parse_string_with_escaped_dollar() {
2436 assert_eq!(
2439 parse_string_literal(r#""\$VAR""#).expect("ok"),
2440 "__KAISH_ESCAPED_DOLLAR__VAR"
2441 );
2442 assert_eq!(
2443 parse_string_literal(r#""cost: \$100""#).expect("ok"),
2444 "cost: __KAISH_ESCAPED_DOLLAR__100"
2445 );
2446 }
2447
2448 #[test]
2453 fn parse_simple_var() {
2454 assert_eq!(
2455 parse_var_ref("${X}").expect("ok"),
2456 vec!["X"]
2457 );
2458 }
2459
2460 #[test]
2461 fn parse_var_with_field() {
2462 assert_eq!(
2463 parse_var_ref("${VAR.field}").expect("ok"),
2464 vec!["VAR", "field"]
2465 );
2466 }
2467
2468 #[test]
2469 fn parse_var_with_index() {
2470 assert_eq!(
2471 parse_var_ref("${VAR[0]}").expect("ok"),
2472 vec!["VAR", "[0]"]
2473 );
2474 }
2475
2476 #[test]
2477 fn parse_var_nested() {
2478 assert_eq!(
2479 parse_var_ref("${VAR.field[0].nested}").expect("ok"),
2480 vec!["VAR", "field", "[0]", "nested"]
2481 );
2482 }
2483
2484 #[test]
2485 fn parse_last_result() {
2486 assert_eq!(
2487 parse_var_ref("${?}").expect("ok"),
2488 vec!["?"]
2489 );
2490 }
2491
2492 #[test]
2497 fn parse_integers() {
2498 assert_eq!(parse_int("0").expect("ok"), 0);
2499 assert_eq!(parse_int("42").expect("ok"), 42);
2500 assert_eq!(parse_int("-1").expect("ok"), -1);
2501 }
2502
2503 #[test]
2504 fn parse_floats() {
2505 assert!((parse_float("3.14").expect("ok") - 3.14).abs() < f64::EPSILON);
2506 assert!((parse_float("-0.5").expect("ok") - (-0.5)).abs() < f64::EPSILON);
2507 }
2508
2509 #[test]
2514 fn empty_input() {
2515 assert_eq!(lex(""), vec![]);
2516 }
2517
2518 #[test]
2519 fn only_whitespace() {
2520 assert_eq!(lex(" \t\t "), vec![]);
2521 }
2522
2523 #[test]
2524 fn json_array() {
2525 assert_eq!(
2526 lex(r#"[1, 2, 3]"#),
2527 vec![
2528 Token::LBracket,
2529 Token::Int(1),
2530 Token::Comma,
2531 Token::Int(2),
2532 Token::Comma,
2533 Token::Int(3),
2534 Token::RBracket
2535 ]
2536 );
2537 }
2538
2539 #[test]
2540 fn json_object() {
2541 assert_eq!(
2542 lex(r#"{"key": "value"}"#),
2543 vec![
2544 Token::LBrace,
2545 Token::String("key".to_string()),
2546 Token::Colon,
2547 Token::String("value".to_string()),
2548 Token::RBrace
2549 ]
2550 );
2551 }
2552
2553 #[test]
2554 fn redirect_operators() {
2555 assert_eq!(
2556 lex("cmd > file"),
2557 vec![Token::Ident("cmd".to_string()), Token::Gt, Token::Ident("file".to_string())]
2558 );
2559 assert_eq!(
2560 lex("cmd >> file"),
2561 vec![Token::Ident("cmd".to_string()), Token::GtGt, Token::Ident("file".to_string())]
2562 );
2563 assert_eq!(
2564 lex("cmd 2> err"),
2565 vec![Token::Ident("cmd".to_string()), Token::Stderr, Token::Ident("err".to_string())]
2566 );
2567 assert_eq!(
2568 lex("cmd &> all"),
2569 vec![Token::Ident("cmd".to_string()), Token::Both, Token::Ident("all".to_string())]
2570 );
2571 }
2572
2573 #[test]
2574 fn background_job() {
2575 assert_eq!(
2576 lex("cmd &"),
2577 vec![Token::Ident("cmd".to_string()), Token::Amp]
2578 );
2579 }
2580
2581 #[test]
2582 fn command_substitution() {
2583 assert_eq!(
2584 lex("$(cmd)"),
2585 vec![Token::CmdSubstStart, Token::Ident("cmd".to_string()), Token::RParen]
2586 );
2587 assert_eq!(
2588 lex("$(cmd arg)"),
2589 vec![
2590 Token::CmdSubstStart,
2591 Token::Ident("cmd".to_string()),
2592 Token::Ident("arg".to_string()),
2593 Token::RParen
2594 ]
2595 );
2596 assert_eq!(
2597 lex("$(a | b)"),
2598 vec![
2599 Token::CmdSubstStart,
2600 Token::Ident("a".to_string()),
2601 Token::Pipe,
2602 Token::Ident("b".to_string()),
2603 Token::RParen
2604 ]
2605 );
2606 }
2607
2608 #[test]
2609 fn complex_pipeline() {
2610 assert_eq!(
2611 lex(r#"cat file | grep pattern="foo" | head count=10"#),
2612 vec![
2613 Token::Ident("cat".to_string()),
2614 Token::Ident("file".to_string()),
2615 Token::Pipe,
2616 Token::Ident("grep".to_string()),
2617 Token::Ident("pattern".to_string()),
2618 Token::Eq,
2619 Token::String("foo".to_string()),
2620 Token::Pipe,
2621 Token::Ident("head".to_string()),
2622 Token::Ident("count".to_string()),
2623 Token::Eq,
2624 Token::Int(10),
2625 ]
2626 );
2627 }
2628
2629 #[test]
2634 fn short_flag() {
2635 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2636 assert_eq!(lex("-a"), vec![Token::ShortFlag("a".to_string())]);
2637 assert_eq!(lex("-v"), vec![Token::ShortFlag("v".to_string())]);
2638 }
2639
2640 #[test]
2641 fn short_flag_combined() {
2642 assert_eq!(lex("-la"), vec![Token::ShortFlag("la".to_string())]);
2644 assert_eq!(lex("-vvv"), vec![Token::ShortFlag("vvv".to_string())]);
2645 }
2646
2647 #[test]
2648 fn job_spec_lexes_as_one_token() {
2649 assert_eq!(lex("%1"), vec![Token::JobSpec("%1".to_string())]);
2651 assert_eq!(lex("%12"), vec![Token::JobSpec("%12".to_string())]);
2652 assert_eq!(
2653 lex("wait %1 %2"),
2654 vec![
2655 Token::Ident("wait".to_string()),
2656 Token::JobSpec("%1".to_string()),
2657 Token::JobSpec("%2".to_string()),
2658 ]
2659 );
2660 }
2661
2662 #[test]
2663 fn short_flag_with_internal_hyphens_is_one_token() {
2664 assert_eq!(
2668 lex("-not-a-flag"),
2669 vec![Token::ShortFlag("not-a-flag".to_string())]
2670 );
2671 assert_eq!(lex("--"), vec![Token::DoubleDash]);
2675 assert_eq!(lex("-"), vec![Token::MinusAlone]);
2676 }
2677
2678 #[test]
2679 fn long_flag() {
2680 assert_eq!(lex("--force"), vec![Token::LongFlag("force".to_string())]);
2681 assert_eq!(lex("--verbose"), vec![Token::LongFlag("verbose".to_string())]);
2682 assert_eq!(lex("--foo-bar"), vec![Token::LongFlag("foo-bar".to_string())]);
2683 }
2684
2685 #[test]
2686 fn double_dash() {
2687 assert_eq!(lex("--"), vec![Token::DoubleDash]);
2689 }
2690
2691 #[test]
2692 fn flags_vs_negative_numbers() {
2693 assert_eq!(lex("-123"), vec![Token::Int(-123)]);
2695 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2697 assert_eq!(
2700 lex("-1 a"),
2701 vec![Token::Int(-1), Token::Ident("a".to_string())]
2702 );
2703 }
2704
2705 #[test]
2706 fn command_with_flags() {
2707 assert_eq!(
2708 lex("ls -l"),
2709 vec![
2710 Token::Ident("ls".to_string()),
2711 Token::ShortFlag("l".to_string()),
2712 ]
2713 );
2714 assert_eq!(
2715 lex("git commit -m"),
2716 vec![
2717 Token::Ident("git".to_string()),
2718 Token::Ident("commit".to_string()),
2719 Token::ShortFlag("m".to_string()),
2720 ]
2721 );
2722 assert_eq!(
2723 lex("git push --force"),
2724 vec![
2725 Token::Ident("git".to_string()),
2726 Token::Ident("push".to_string()),
2727 Token::LongFlag("force".to_string()),
2728 ]
2729 );
2730 }
2731
2732 #[test]
2733 fn flag_with_value() {
2734 assert_eq!(
2735 lex(r#"git commit -m "message""#),
2736 vec![
2737 Token::Ident("git".to_string()),
2738 Token::Ident("commit".to_string()),
2739 Token::ShortFlag("m".to_string()),
2740 Token::String("message".to_string()),
2741 ]
2742 );
2743 assert_eq!(
2744 lex(r#"--message="hello""#),
2745 vec![
2746 Token::LongFlag("message".to_string()),
2747 Token::Eq,
2748 Token::String("hello".to_string()),
2749 ]
2750 );
2751 }
2752
2753 #[test]
2754 fn end_of_flags_marker() {
2755 assert_eq!(
2756 lex("git checkout -- file"),
2757 vec![
2758 Token::Ident("git".to_string()),
2759 Token::Ident("checkout".to_string()),
2760 Token::DoubleDash,
2761 Token::Ident("file".to_string()),
2762 ]
2763 );
2764 }
2765
2766 #[test]
2771 fn local_keyword() {
2772 assert_eq!(lex("local"), vec![Token::Local]);
2773 assert_eq!(
2774 lex("local X = 5"),
2775 vec![Token::Local, Token::Ident("X".to_string()), Token::Eq, Token::Int(5)]
2776 );
2777 }
2778
2779 #[test]
2780 fn simple_var_ref() {
2781 assert_eq!(lex("$X"), vec![Token::SimpleVarRef("X".to_string())]);
2782 assert_eq!(lex("$foo"), vec![Token::SimpleVarRef("foo".to_string())]);
2783 assert_eq!(lex("$foo_bar"), vec![Token::SimpleVarRef("foo_bar".to_string())]);
2784 assert_eq!(lex("$_private"), vec![Token::SimpleVarRef("_private".to_string())]);
2785 }
2786
2787 #[test]
2788 fn simple_var_ref_in_command() {
2789 assert_eq!(
2790 lex("echo $NAME"),
2791 vec![Token::Ident("echo".to_string()), Token::SimpleVarRef("NAME".to_string())]
2792 );
2793 }
2794
2795 #[test]
2796 fn single_quoted_strings() {
2797 assert_eq!(lex("'hello'"), vec![Token::SingleString("hello".to_string())]);
2798 assert_eq!(lex("'hello world'"), vec![Token::SingleString("hello world".to_string())]);
2799 assert_eq!(lex("''"), vec![Token::SingleString("".to_string())]);
2800 assert_eq!(lex(r"'no $VAR here'"), vec![Token::SingleString("no $VAR here".to_string())]);
2802 assert_eq!(lex(r"'backslash \n stays'"), vec![Token::SingleString(r"backslash \n stays".to_string())]);
2803 }
2804
2805 #[test]
2806 fn test_brackets() {
2807 assert_eq!(lex("[["), vec![Token::LBracket, Token::LBracket]);
2809 assert_eq!(lex("]]"), vec![Token::RBracket, Token::RBracket]);
2810 assert_eq!(
2811 lex("[[ -f file ]]"),
2812 vec![
2813 Token::LBracket,
2814 Token::LBracket,
2815 Token::ShortFlag("f".to_string()),
2816 Token::Ident("file".to_string()),
2817 Token::RBracket,
2818 Token::RBracket
2819 ]
2820 );
2821 }
2822
2823 #[test]
2824 fn test_expression_syntax() {
2825 assert_eq!(
2826 lex(r#"[[ $X == "value" ]]"#),
2827 vec![
2828 Token::LBracket,
2829 Token::LBracket,
2830 Token::SimpleVarRef("X".to_string()),
2831 Token::EqEq,
2832 Token::String("value".to_string()),
2833 Token::RBracket,
2834 Token::RBracket
2835 ]
2836 );
2837 }
2838
2839 #[test]
2840 fn bash_style_assignment() {
2841 assert_eq!(
2843 lex(r#"NAME="value""#),
2844 vec![
2845 Token::Ident("NAME".to_string()),
2846 Token::Eq,
2847 Token::String("value".to_string())
2848 ]
2849 );
2850 }
2851
2852 #[test]
2853 fn positional_params() {
2854 assert_eq!(lex("$0"), vec![Token::Positional(0)]);
2855 assert_eq!(lex("$1"), vec![Token::Positional(1)]);
2856 assert_eq!(lex("$9"), vec![Token::Positional(9)]);
2857 assert_eq!(lex("$@"), vec![Token::AllArgs]);
2858 assert_eq!(lex("$#"), vec![Token::ArgCount]);
2859 }
2860
2861 #[test]
2862 fn positional_in_context() {
2863 assert_eq!(
2864 lex("echo $1 $2"),
2865 vec![
2866 Token::Ident("echo".to_string()),
2867 Token::Positional(1),
2868 Token::Positional(2),
2869 ]
2870 );
2871 }
2872
2873 #[test]
2874 fn var_length() {
2875 assert_eq!(lex("${#X}"), vec![Token::VarLength("X".to_string())]);
2876 assert_eq!(lex("${#NAME}"), vec![Token::VarLength("NAME".to_string())]);
2877 assert_eq!(lex("${#foo_bar}"), vec![Token::VarLength("foo_bar".to_string())]);
2878 }
2879
2880 #[test]
2881 fn var_length_in_context() {
2882 assert_eq!(
2883 lex("echo ${#NAME}"),
2884 vec![
2885 Token::Ident("echo".to_string()),
2886 Token::VarLength("NAME".to_string()),
2887 ]
2888 );
2889 }
2890
2891 #[test]
2896 fn plus_flag() {
2897 assert_eq!(lex("+e"), vec![Token::PlusFlag("e".to_string())]);
2899 assert_eq!(lex("+x"), vec![Token::PlusFlag("x".to_string())]);
2900 assert_eq!(lex("+ex"), vec![Token::PlusFlag("ex".to_string())]);
2901 }
2902
2903 #[test]
2904 fn set_with_plus_flag() {
2905 assert_eq!(
2906 lex("set +e"),
2907 vec![
2908 Token::Set,
2909 Token::PlusFlag("e".to_string()),
2910 ]
2911 );
2912 }
2913
2914 #[test]
2915 fn set_with_multiple_flags() {
2916 assert_eq!(
2917 lex("set -e -u"),
2918 vec![
2919 Token::Set,
2920 Token::ShortFlag("e".to_string()),
2921 Token::ShortFlag("u".to_string()),
2922 ]
2923 );
2924 }
2925
2926 #[test]
2927 fn flags_vs_negative_numbers_edge_cases() {
2928 assert_eq!(
2930 lex("-1 a"),
2931 vec![Token::Int(-1), Token::Ident("a".to_string())]
2932 );
2933 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2935 assert_eq!(lex("-123"), vec![Token::Int(-123)]);
2937 }
2938
2939 #[test]
2940 fn single_dash_is_minus_alone() {
2941 let result = tokenize("-").expect("should lex");
2943 assert_eq!(result.len(), 1);
2944 assert!(matches!(result[0].token, Token::MinusAlone));
2945 }
2946
2947 #[test]
2948 fn plus_bare_for_date_format() {
2949 let result = tokenize("+%s").expect("should lex");
2951 assert_eq!(result.len(), 1);
2952 assert!(matches!(result[0].token, Token::PlusBare(ref s) if s == "+%s"));
2953
2954 let result = tokenize("+%Y-%m-%d").expect("should lex");
2956 assert_eq!(result.len(), 1);
2957 assert!(matches!(result[0].token, Token::PlusBare(ref s) if s == "+%Y-%m-%d"));
2958 }
2959
2960 #[test]
2961 fn plus_flag_still_works() {
2962 let result = tokenize("+e").expect("should lex");
2964 assert_eq!(result.len(), 1);
2965 assert!(matches!(result[0].token, Token::PlusFlag(ref s) if s == "e"));
2966 }
2967
2968 #[test]
2969 fn while_keyword_vs_while_loop() {
2970 assert_eq!(lex("while"), vec![Token::While]);
2972 assert_eq!(
2974 lex("while true"),
2975 vec![Token::While, Token::True]
2976 );
2977 }
2978
2979 #[test]
2980 fn control_flow_keywords() {
2981 assert_eq!(lex("break"), vec![Token::Break]);
2982 assert_eq!(lex("continue"), vec![Token::Continue]);
2983 assert_eq!(lex("return"), vec![Token::Return]);
2984 assert_eq!(lex("exit"), vec![Token::Exit]);
2985 }
2986
2987 #[test]
2988 fn control_flow_with_numbers() {
2989 assert_eq!(
2990 lex("break 2"),
2991 vec![Token::Break, Token::Int(2)]
2992 );
2993 assert_eq!(
2994 lex("continue 3"),
2995 vec![Token::Continue, Token::Int(3)]
2996 );
2997 assert_eq!(
2998 lex("exit 1"),
2999 vec![Token::Exit, Token::Int(1)]
3000 );
3001 }
3002
3003 #[test]
3008 fn heredoc_simple() {
3009 let source = "cat <<EOF\nhello\nworld\nEOF";
3010 let tokens = lex(source);
3011 assert_eq!(tokens, vec![
3013 Token::Ident("cat".to_string()),
3014 Token::HereDocStart,
3015 Token::HereDoc(HereDocData {
3016 content: "hello\nworld\n".to_string(),
3017 literal: false,
3018 strip_tabs: false,
3019 body_start_offset: 10,
3020 }),
3021 Token::Newline,
3022 ]);
3023 }
3024
3025 #[test]
3026 fn heredoc_empty() {
3027 let source = "cat <<EOF\nEOF";
3028 let tokens = lex(source);
3029 assert_eq!(tokens, vec![
3030 Token::Ident("cat".to_string()),
3031 Token::HereDocStart,
3032 Token::HereDoc(HereDocData {
3033 content: "".to_string(),
3034 literal: false,
3035 strip_tabs: false,
3036 body_start_offset: 10,
3037 }),
3038 Token::Newline,
3039 ]);
3040 }
3041
3042 #[test]
3043 fn heredoc_with_special_chars() {
3044 let source = "cat <<EOF\n$VAR and \"quoted\" 'single'\nEOF";
3045 let tokens = lex(source);
3046 assert_eq!(tokens, vec![
3047 Token::Ident("cat".to_string()),
3048 Token::HereDocStart,
3049 Token::HereDoc(HereDocData {
3050 content: "$VAR and \"quoted\" 'single'\n".to_string(),
3051 literal: false,
3052 strip_tabs: false,
3053 body_start_offset: 10,
3054 }),
3055 Token::Newline,
3056 ]);
3057 }
3058
3059 #[test]
3060 fn heredoc_multiline() {
3061 let source = "cat <<END\nline1\nline2\nline3\nEND";
3062 let tokens = lex(source);
3063 assert_eq!(tokens, vec![
3064 Token::Ident("cat".to_string()),
3065 Token::HereDocStart,
3066 Token::HereDoc(HereDocData {
3067 content: "line1\nline2\nline3\n".to_string(),
3068 literal: false,
3069 strip_tabs: false,
3070 body_start_offset: 10,
3071 }),
3072 Token::Newline,
3073 ]);
3074 }
3075
3076 #[test]
3077 fn heredoc_in_command() {
3078 let source = "cat <<EOF\nhello\nEOF\necho goodbye";
3079 let tokens = lex(source);
3080 assert_eq!(tokens, vec![
3081 Token::Ident("cat".to_string()),
3082 Token::HereDocStart,
3083 Token::HereDoc(HereDocData {
3084 content: "hello\n".to_string(),
3085 literal: false,
3086 strip_tabs: false,
3087 body_start_offset: 10,
3088 }),
3089 Token::Newline,
3090 Token::Ident("echo".to_string()),
3091 Token::Ident("goodbye".to_string()),
3092 ]);
3093 }
3094
3095 #[test]
3096 fn heredoc_strip_tabs() {
3097 let source = "cat <<-EOF\n\thello\n\tworld\n\tEOF";
3098 let tokens = lex(source);
3099 assert_eq!(tokens, vec![
3103 Token::Ident("cat".to_string()),
3104 Token::HereDocStart,
3105 Token::HereDoc(HereDocData {
3106 content: "\thello\n\tworld\n".to_string(),
3107 literal: false,
3108 strip_tabs: true,
3109 body_start_offset: 11,
3110 }),
3111 Token::Newline,
3112 ]);
3113 }
3114
3115 #[test]
3120 fn arithmetic_simple() {
3121 let source = "$((1 + 2))";
3122 let tokens = lex(source);
3123 assert_eq!(tokens, vec![Token::Arithmetic("1 + 2".to_string())]);
3124 }
3125
3126 #[test]
3127 fn arithmetic_in_assignment() {
3128 let source = "X=$((5 * 3))";
3129 let tokens = lex(source);
3130 assert_eq!(tokens, vec![
3131 Token::Ident("X".to_string()),
3132 Token::Eq,
3133 Token::Arithmetic("5 * 3".to_string()),
3134 ]);
3135 }
3136
3137 #[test]
3138 fn arithmetic_with_nested_parens() {
3139 let source = "$((2 * (3 + 4)))";
3140 let tokens = lex(source);
3141 assert_eq!(tokens, vec![Token::Arithmetic("2 * (3 + 4)".to_string())]);
3142 }
3143
3144 #[test]
3145 fn arithmetic_with_variable() {
3146 let source = "$((X + 1))";
3147 let tokens = lex(source);
3148 assert_eq!(tokens, vec![Token::Arithmetic("X + 1".to_string())]);
3149 }
3150
3151 #[test]
3152 fn arithmetic_command_subst_not_confused() {
3153 let source = "$(echo hello)";
3155 let tokens = lex(source);
3156 assert_eq!(tokens, vec![
3157 Token::CmdSubstStart,
3158 Token::Ident("echo".to_string()),
3159 Token::Ident("hello".to_string()),
3160 Token::RParen,
3161 ]);
3162 }
3163
3164 #[test]
3165 fn arithmetic_nesting_limit() {
3166 let open_parens = "(".repeat(300);
3168 let close_parens = ")".repeat(300);
3169 let source = format!("$(({}1{}))", open_parens, close_parens);
3170 let result = tokenize(&source);
3171 assert!(result.is_err());
3172 let errors = result.unwrap_err();
3173 assert_eq!(errors.len(), 1);
3174 assert_eq!(errors[0].token, LexerError::NestingTooDeep);
3175 }
3176
3177 #[test]
3178 fn arithmetic_nesting_within_limit() {
3179 let source = "$((((1 + 2) * 3)))";
3181 let tokens = lex(source);
3182 assert_eq!(tokens, vec![Token::Arithmetic("((1 + 2) * 3)".to_string())]);
3183 }
3184
3185 #[test]
3197 fn arithmetic_after_apostrophe_in_comment() {
3198 let source = "# this doesn't work\necho $((1+2))";
3201 let tokens = lex(source);
3202 assert_eq!(tokens, vec![
3203 Token::Newline,
3204 Token::Ident("echo".to_string()),
3205 Token::Arithmetic("1+2".to_string()),
3206 ]);
3207 }
3208
3209 #[test]
3210 fn arithmetic_inside_comment_is_not_expanded() {
3211 let source = "# the $((y)) syntax explained\necho hello";
3213 let tokens = lex(source);
3214 assert_eq!(tokens, vec![
3215 Token::Newline,
3216 Token::Ident("echo".to_string()),
3217 Token::Ident("hello".to_string()),
3218 ]);
3219 }
3220
3221 #[test]
3222 fn backticked_arithmetic_in_comment_is_not_expanded() {
3223 let source = "# the `$((x))` syntax explained\necho $((3+4))";
3227 let tokens = lex(source);
3228 assert_eq!(tokens, vec![
3229 Token::Newline,
3230 Token::Ident("echo".to_string()),
3231 Token::Arithmetic("3+4".to_string()),
3232 ]);
3233 }
3234
3235 #[test]
3236 fn arithmetic_still_works_outside_comments() {
3237 let source = "X=$((1+2)); Y=$((3*4))";
3240 let tokens = lex(source);
3241 assert_eq!(tokens, vec![
3242 Token::Ident("X".to_string()),
3243 Token::Eq,
3244 Token::Arithmetic("1+2".to_string()),
3245 Token::Semi,
3246 Token::Ident("Y".to_string()),
3247 Token::Eq,
3248 Token::Arithmetic("3*4".to_string()),
3249 ]);
3250 }
3251
3252 #[test]
3253 fn arithmetic_inside_double_quotes_still_expands() {
3254 let source = "echo \"# $((1+2))\"";
3257 let tokens = lex(source);
3258 assert_eq!(tokens.len(), 2);
3263 assert!(matches!(tokens[0], Token::Ident(_)));
3264 assert!(matches!(tokens[1], Token::String(_)));
3265 }
3266
3267 #[test]
3280 fn backtick_in_source_is_rejected() {
3281 let result = tokenize("echo `date`");
3282 assert!(result.is_err());
3283 let errors = result.unwrap_err();
3284 assert!(errors.iter().any(|e| e.token == LexerError::BackticksNotSupported));
3285 }
3286
3287 #[test]
3288 fn backtick_in_comment_is_just_comment_text() {
3289 let source = "# use `date` here\necho hi";
3292 let tokens = lex(source);
3293 assert_eq!(tokens, vec![
3294 Token::Newline,
3295 Token::Ident("echo".to_string()),
3296 Token::Ident("hi".to_string()),
3297 ]);
3298 }
3299
3300 #[test]
3301 fn backtick_in_single_quoted_string_is_literal() {
3302 let source = "echo '`date`'";
3305 let tokens = lex(source);
3306 assert_eq!(tokens, vec![
3307 Token::Ident("echo".to_string()),
3308 Token::SingleString("`date`".to_string()),
3309 ]);
3310 }
3311
3312 #[test]
3313 fn backtick_in_double_quoted_string_is_literal() {
3314 let source = "echo \"`date`\"";
3319 let tokens = lex(source);
3320 assert_eq!(tokens.len(), 2);
3321 assert!(matches!(tokens[0], Token::Ident(_)));
3322 match &tokens[1] {
3323 Token::String(s) => assert!(s.contains('`')),
3324 other => panic!("expected Token::String, got {:?}", other),
3325 }
3326 }
3327
3328 #[test]
3329 fn backtick_in_heredoc_body_is_preserved() {
3330 let source = "cat <<EOF\n`date`\nEOF\n";
3333 let tokens = lex(source);
3334 let heredoc = tokens.iter().find(|t| matches!(t, Token::HereDoc(_)));
3335 assert!(heredoc.is_some(), "expected a HereDoc token");
3336 if let Some(Token::HereDoc(d)) = heredoc {
3337 assert!(d.content.contains('`'));
3338 }
3339 }
3340
3341 #[test]
3346 fn token_categories() {
3347 assert_eq!(Token::If.category(), TokenCategory::Keyword);
3349 assert_eq!(Token::Then.category(), TokenCategory::Keyword);
3350 assert_eq!(Token::For.category(), TokenCategory::Keyword);
3351 assert_eq!(Token::Function.category(), TokenCategory::Keyword);
3352 assert_eq!(Token::True.category(), TokenCategory::Keyword);
3353 assert_eq!(Token::TypeString.category(), TokenCategory::Keyword);
3354
3355 assert_eq!(Token::Pipe.category(), TokenCategory::Operator);
3357 assert_eq!(Token::And.category(), TokenCategory::Operator);
3358 assert_eq!(Token::Or.category(), TokenCategory::Operator);
3359 assert_eq!(Token::StderrToStdout.category(), TokenCategory::Operator);
3360 assert_eq!(Token::GtGt.category(), TokenCategory::Operator);
3361
3362 assert_eq!(Token::String("test".to_string()).category(), TokenCategory::String);
3364 assert_eq!(Token::SingleString("test".to_string()).category(), TokenCategory::String);
3365 assert_eq!(
3366 Token::HereDoc(HereDocData {
3367 content: "test".to_string(),
3368 literal: false,
3369 strip_tabs: false,
3370 body_start_offset: 0,
3371 }).category(),
3372 TokenCategory::String,
3373 );
3374
3375 assert_eq!(Token::Int(42).category(), TokenCategory::Number);
3377 assert_eq!(Token::Float(3.14).category(), TokenCategory::Number);
3378 assert_eq!(Token::Arithmetic("1+2".to_string()).category(), TokenCategory::Number);
3379
3380 assert_eq!(Token::SimpleVarRef("X".to_string()).category(), TokenCategory::Variable);
3382 assert_eq!(Token::VarRef("${X}".to_string()).category(), TokenCategory::Variable);
3383 assert_eq!(Token::Positional(1).category(), TokenCategory::Variable);
3384 assert_eq!(Token::AllArgs.category(), TokenCategory::Variable);
3385 assert_eq!(Token::ArgCount.category(), TokenCategory::Variable);
3386 assert_eq!(Token::LastExitCode.category(), TokenCategory::Variable);
3387 assert_eq!(Token::CurrentPid.category(), TokenCategory::Variable);
3388
3389 assert_eq!(Token::ShortFlag("l".to_string()).category(), TokenCategory::Flag);
3391 assert_eq!(Token::LongFlag("verbose".to_string()).category(), TokenCategory::Flag);
3392 assert_eq!(Token::PlusFlag("e".to_string()).category(), TokenCategory::Flag);
3393 assert_eq!(Token::DoubleDash.category(), TokenCategory::Flag);
3394
3395 assert_eq!(Token::Semi.category(), TokenCategory::Punctuation);
3397 assert_eq!(Token::LParen.category(), TokenCategory::Punctuation);
3398 assert_eq!(Token::LBracket.category(), TokenCategory::Punctuation);
3399 assert_eq!(Token::Newline.category(), TokenCategory::Punctuation);
3400
3401 assert_eq!(Token::Comment.category(), TokenCategory::Comment);
3403
3404 assert_eq!(Token::Path("/tmp/file".to_string()).category(), TokenCategory::Path);
3406
3407 assert_eq!(Token::Ident("echo".to_string()).category(), TokenCategory::Command);
3409 assert_eq!(Token::NumberIdent("019dda1c".to_string()).category(), TokenCategory::Command);
3410 assert_eq!(Token::DottedIdent(".gitignore".to_string()).category(), TokenCategory::Command);
3411
3412 assert_eq!(Token::InvalidFloatNoLeading.category(), TokenCategory::Error);
3414 assert_eq!(Token::InvalidFloatNoTrailing.category(), TokenCategory::Error);
3415 }
3416
3417 #[test]
3418 fn test_heredoc_piped_to_command() {
3419 let tokens = tokenize("cat <<EOF | jq\n{\"key\": \"val\"}\nEOF").unwrap();
3422 let heredoc_pos = tokens.iter().position(|t| matches!(t.token, Token::HereDoc(_)));
3423 let pipe_pos = tokens.iter().position(|t| matches!(t.token, Token::Pipe));
3424 assert!(heredoc_pos.is_some(), "should have a heredoc token");
3425 assert!(pipe_pos.is_some(), "should have a pipe token");
3426 assert!(
3427 pipe_pos.unwrap() > heredoc_pos.unwrap(),
3428 "Pipe must come after heredoc, got heredoc at {}, pipe at {}. Tokens: {:?}",
3429 heredoc_pos.unwrap(), pipe_pos.unwrap(), tokens,
3430 );
3431 }
3432
3433 #[test]
3434 fn test_heredoc_standalone_still_works() {
3435 let tokens = tokenize("cat <<EOF\nhello\nEOF").unwrap();
3437 assert!(tokens.iter().any(|t| matches!(t.token, Token::HereDoc(_))));
3438 assert!(!tokens.iter().any(|t| matches!(t.token, Token::Pipe)));
3439 }
3440
3441 #[test]
3442 fn test_heredoc_preserves_leading_empty_lines() {
3443 let tokens = tokenize("cat <<EOF\n\nhello\nEOF").unwrap();
3445 let heredoc = tokens.iter().find_map(|t| {
3446 if let Token::HereDoc(data) = &t.token {
3447 Some(data.clone())
3448 } else {
3449 None
3450 }
3451 });
3452 assert!(heredoc.is_some(), "should have a heredoc token");
3453 let data = heredoc.unwrap();
3454 assert!(data.content.starts_with('\n'), "leading empty line must be preserved, got: {:?}", data.content);
3455 assert_eq!(data.content, "\nhello\n");
3456 }
3457
3458 #[test]
3459 fn test_heredoc_quoted_delimiter_sets_literal() {
3460 let tokens = tokenize("cat <<'EOF'\nhello $HOME\nEOF").unwrap();
3462 let heredoc = tokens.iter().find_map(|t| {
3463 if let Token::HereDoc(data) = &t.token {
3464 Some(data.clone())
3465 } else {
3466 None
3467 }
3468 });
3469 assert!(heredoc.is_some(), "should have a heredoc token");
3470 let data = heredoc.unwrap();
3471 assert!(data.literal, "quoted delimiter should set literal=true");
3472 assert_eq!(data.content, "hello $HOME\n");
3473 }
3474
3475 #[test]
3476 fn test_heredoc_unquoted_delimiter_not_literal() {
3477 let tokens = tokenize("cat <<EOF\nhello $HOME\nEOF").unwrap();
3479 let heredoc = tokens.iter().find_map(|t| {
3480 if let Token::HereDoc(data) = &t.token {
3481 Some(data.clone())
3482 } else {
3483 None
3484 }
3485 });
3486 assert!(heredoc.is_some(), "should have a heredoc token");
3487 let data = heredoc.unwrap();
3488 assert!(!data.literal, "unquoted delimiter should have literal=false");
3489 }
3490
3491 #[test]
3496 fn colon_double_in_word() {
3497 assert_eq!(lex("foo::bar"), vec![Token::Ident("foo::bar".into())]);
3498 }
3499
3500 #[test]
3501 fn colon_single_in_word() {
3502 assert_eq!(lex("a:b:c"), vec![Token::Ident("a:b:c".into())]);
3503 }
3504
3505 #[test]
3506 fn colon_with_port() {
3507 assert_eq!(lex("host:8080"), vec![Token::Ident("host:8080".into())]);
3508 }
3509
3510 #[test]
3511 fn colon_standalone() {
3512 assert_eq!(lex(":"), vec![Token::Colon]);
3513 }
3514
3515 #[test]
3516 fn colon_spaced_no_merge() {
3517 assert_eq!(
3518 lex("foo : bar"),
3519 vec![
3520 Token::Ident("foo".into()),
3521 Token::Colon,
3522 Token::Ident("bar".into()),
3523 ]
3524 );
3525 }
3526
3527 #[test]
3528 fn colon_in_command_arg() {
3529 assert_eq!(
3530 lex("echo foo::bar"),
3531 vec![
3532 Token::Ident("echo".into()),
3533 Token::Ident("foo::bar".into()),
3534 ]
3535 );
3536 }
3537
3538 #[test]
3539 fn colon_trailing() {
3540 assert_eq!(lex("foo:"), vec![Token::Ident("foo:".into())]);
3542 }
3543
3544 #[test]
3545 fn colon_leading() {
3546 assert_eq!(lex(":foo"), vec![Token::Ident(":foo".into())]);
3548 }
3549
3550 #[test]
3551 fn colon_with_path() {
3552 assert_eq!(
3554 lex("/usr/bin:8080"),
3555 vec![Token::Ident("/usr/bin:8080".into())]
3556 );
3557 }
3558
3559 #[test]
3564 fn is_keyword_covers_control_flow() {
3565 for t in [
3566 Token::While,
3567 Token::Return,
3568 Token::Break,
3569 Token::Continue,
3570 Token::Exit,
3571 ] {
3572 assert!(t.is_keyword(), "{t:?} should be a keyword");
3573 }
3574 }
3575
3576 #[test]
3577 fn starts_statement_covers_while() {
3578 assert!(Token::While.starts_statement());
3579 }
3580
3581 #[test]
3582 fn is_keyword_rejects_operators() {
3583 for t in [Token::Pipe, Token::Amp, Token::Eq, Token::LBrace] {
3584 assert!(!t.is_keyword(), "{t:?} should not be a keyword");
3585 }
3586 }
3587}