1use logos::{Logos, Span};
17use std::fmt;
18use std::sync::atomic::{AtomicU64, Ordering};
19use std::time::{SystemTime, UNIX_EPOCH};
20
21static MARKER_COUNTER: AtomicU64 = AtomicU64::new(0);
23
24const MAX_PAREN_DEPTH: usize = 256;
27
28#[derive(Debug, Clone)]
32struct SpanReplacement {
33 preprocessed_pos: usize,
35 marker_len: usize,
37 original_len: usize,
39}
40
41fn correct_span(span: Span, replacements: &[SpanReplacement]) -> Span {
43 let mut start_adjustment: isize = 0;
44 let mut end_adjustment: isize = 0;
45
46 for r in replacements {
47 let delta = r.original_len as isize - r.marker_len as isize;
49
50 if span.start > r.preprocessed_pos + r.marker_len {
52 start_adjustment += delta;
53 } else if span.start > r.preprocessed_pos {
54 start_adjustment += delta;
57 }
58
59 if span.end > r.preprocessed_pos + r.marker_len {
61 end_adjustment += delta;
62 } else if span.end > r.preprocessed_pos {
63 end_adjustment += delta;
65 }
66 }
67
68 let new_start = (span.start as isize + start_adjustment).max(0) as usize;
69 let new_end = (span.end as isize + end_adjustment).max(new_start as isize) as usize;
70 new_start..new_end
71}
72
73fn unique_marker_id() -> String {
76 let timestamp = SystemTime::now()
77 .duration_since(UNIX_EPOCH)
78 .map(|d| d.as_nanos())
79 .unwrap_or(0);
80 let counter = MARKER_COUNTER.fetch_add(1, Ordering::Relaxed);
81 #[cfg(target_os = "wasi")]
82 let pid = 0u32;
83 #[cfg(not(target_os = "wasi"))]
84 let pid = std::process::id();
85 format!("{:x}_{:x}_{:x}", timestamp, counter, pid)
86}
87
88#[derive(Debug, Clone, PartialEq)]
90pub struct Spanned<T> {
91 pub token: T,
92 pub span: Span,
93}
94
95impl<T> Spanned<T> {
96 pub fn new(token: T, span: Span) -> Self {
97 Self { token, span }
98 }
99}
100
101#[derive(Debug, Clone, PartialEq, Default)]
103pub enum LexerError {
104 #[default]
105 UnexpectedCharacter,
106 UnterminatedString,
107 UnterminatedVarRef,
108 InvalidEscape,
109 InvalidNumber,
110 AmbiguousBoolean(String),
111 AmbiguousBooleanLike(String),
112 InvalidFloatNoLeading,
113 InvalidFloatNoTrailing,
114 NestingTooDeep,
116 UnterminatedHeredoc { delimiter: String },
120 BackticksNotSupported,
125}
126
127impl fmt::Display for LexerError {
128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 match self {
130 LexerError::UnexpectedCharacter => write!(f, "unexpected character"),
131 LexerError::UnterminatedString => write!(f, "unterminated string"),
132 LexerError::UnterminatedVarRef => write!(f, "unterminated variable reference"),
133 LexerError::InvalidEscape => write!(f, "invalid escape sequence"),
134 LexerError::InvalidNumber => write!(f, "invalid number"),
135 LexerError::AmbiguousBoolean(s) => {
136 write!(f, "ambiguous boolean, use lowercase '{}'", s.to_lowercase())
137 }
138 LexerError::AmbiguousBooleanLike(s) => {
139 let suggest = if s.eq_ignore_ascii_case("yes") { "true" } else { "false" };
140 write!(f, "ambiguous boolean-like '{}', use '{}' or '\"{}\"'", s, suggest, s)
141 }
142 LexerError::InvalidFloatNoLeading => write!(f, "float must have leading digit"),
143 LexerError::InvalidFloatNoTrailing => write!(f, "float must have trailing digit"),
144 LexerError::NestingTooDeep => write!(f, "nesting depth exceeded (max {})", MAX_PAREN_DEPTH),
145 LexerError::UnterminatedHeredoc { delimiter } => {
146 write!(f, "unterminated heredoc, expected closing delimiter `{}` on its own line", delimiter)
147 }
148 LexerError::BackticksNotSupported => {
149 write!(f, "backticks are not supported in kaish; use $(cmd) instead")
150 }
151 }
152 }
153}
154
155#[derive(Debug, Clone, PartialEq)]
178pub struct HereDocData {
179 pub content: String,
180 pub literal: bool,
181 pub strip_tabs: bool,
182 pub body_start_offset: usize,
183}
184
185#[derive(Logos, Debug, Clone, PartialEq)]
186#[logos(error = LexerError)]
187#[logos(skip r"[ \t]+")]
188pub enum Token {
189 #[token("set")]
193 Set,
194
195 #[token("local")]
196 Local,
197
198 #[token("if")]
199 If,
200
201 #[token("then")]
202 Then,
203
204 #[token("else")]
205 Else,
206
207 #[token("elif")]
208 Elif,
209
210 #[token("fi")]
211 Fi,
212
213 #[token("for")]
214 For,
215
216 #[token("while")]
217 While,
218
219 #[token("in")]
220 In,
221
222 #[token("do")]
223 Do,
224
225 #[token("done")]
226 Done,
227
228 #[token("case")]
229 Case,
230
231 #[token("esac")]
232 Esac,
233
234 #[token("function")]
235 Function,
236
237 #[token("break")]
238 Break,
239
240 #[token("continue")]
241 Continue,
242
243 #[token("return")]
244 Return,
245
246 #[token("exit")]
247 Exit,
248
249 #[token("true")]
250 True,
251
252 #[token("false")]
253 False,
254
255 #[token("string")]
259 TypeString,
260
261 #[token("int")]
262 TypeInt,
263
264 #[token("float")]
265 TypeFloat,
266
267 #[token("bool")]
268 TypeBool,
269
270 #[token("&&")]
274 And,
275
276 #[token("||")]
277 Or,
278
279 #[token("==")]
280 EqEq,
281
282 #[token("!=")]
283 NotEq,
284
285 #[token("=~")]
286 Match,
287
288 #[token("!~")]
289 NotMatch,
290
291 #[token(">=")]
292 GtEq,
293
294 #[token("<=")]
295 LtEq,
296
297 #[token(">>")]
298 GtGt,
299
300 #[token("2>&1")]
301 StderrToStdout,
302
303 #[token("1>&2")]
304 StdoutToStderr,
305
306 #[token(">&2")]
307 StdoutToStderr2,
308
309 #[token("2>")]
310 Stderr,
311
312 #[token("&>")]
313 Both,
314
315 #[token("<<<")]
316 HereString,
317
318 #[token("<<")]
319 HereDocStart,
320
321 #[token(";;")]
322 DoubleSemi,
323
324 #[token("=")]
328 Eq,
329
330 #[token("|")]
331 Pipe,
332
333 #[token("&")]
334 Amp,
335
336 #[token(">")]
337 Gt,
338
339 #[token("<")]
340 Lt,
341
342 #[token(";")]
343 Semi,
344
345 #[token(":")]
346 Colon,
347
348 #[token(",")]
349 Comma,
350
351 #[token("..")]
352 DotDot,
353
354 #[token(".")]
355 Dot,
356
357 #[regex(r"~[a-zA-Z0-9_./+-]+", lex_tilde_path, priority = 3)]
359 TildePath(String),
360
361 #[token("~")]
363 Tilde,
364
365 #[regex(r"\.\./[a-zA-Z0-9_./-]+", lex_relative_path, priority = 3)]
371 #[regex(r"[a-zA-Z_][a-zA-Z0-9_.-]*/[a-zA-Z0-9_./-]*", lex_relative_path, priority = 3)]
372 RelativePath(String),
373
374 #[regex(r"\./[a-zA-Z0-9_./-]+", lex_dot_slash_path, priority = 3)]
376 DotSlashPath(String),
377
378 #[regex(r"\.[a-zA-Z_][a-zA-Z0-9_.-]*", lex_dotted_ident, priority = 3)]
384 DottedIdent(String),
385
386 #[token("{")]
387 LBrace,
388
389 #[token("}")]
390 RBrace,
391
392 #[token("[")]
393 LBracket,
394
395 #[token("]")]
396 RBracket,
397
398 #[token("(")]
399 LParen,
400
401 #[token(")")]
402 RParen,
403
404 #[token("*")]
405 Star,
406
407 #[token("!")]
408 Bang,
409
410 #[token("?")]
411 Question,
412
413 GlobWord(String),
416
417 Arithmetic(String),
424
425 #[token("$(")]
427 CmdSubstStart,
428
429 #[regex(r"--[a-zA-Z][a-zA-Z0-9-]*", lex_long_flag, priority = 3)]
435 LongFlag(String),
436
437 #[regex(r"-[a-zA-Z][a-zA-Z0-9]*", lex_short_flag, priority = 3)]
439 ShortFlag(String),
440
441 #[regex(r"\+[a-zA-Z][a-zA-Z0-9]*", lex_plus_flag, priority = 3)]
443 PlusFlag(String),
444
445 #[token("--")]
447 DoubleDash,
448
449 #[regex(r"\+[^a-zA-Z\s][^\s]*", lex_plus_bare, priority = 2)]
452 PlusBare(String),
453
454 #[regex(r"-[^a-zA-Z0-9\s\-][^\s]*", lex_minus_bare, priority = 1)]
458 MinusBare(String),
459
460 #[token("-")]
464 MinusAlone,
465
466 #[regex(r#""([^"\\]|\\.)*""#, lex_string)]
472 String(String),
473
474 #[regex(r"'[^']*'", lex_single_string)]
476 SingleString(String),
477
478 #[regex(r"\$\{[^}]+\}", lex_varref)]
480 VarRef(String),
481
482 #[regex(r"\$[a-zA-Z_][a-zA-Z0-9_]*", lex_simple_varref)]
484 SimpleVarRef(String),
485
486 #[regex(r"\$[0-9]", lex_positional)]
488 Positional(usize),
489
490 #[token("$@")]
492 AllArgs,
493
494 #[token("$#")]
496 ArgCount,
497
498 #[token("$?")]
500 LastExitCode,
501
502 #[token("$$")]
504 CurrentPid,
505
506 #[regex(r"\$\{#[a-zA-Z_][a-zA-Z0-9_]*\}", lex_var_length)]
508 VarLength(String),
509
510 HereDoc(HereDocData),
513
514 #[regex(r"-?[0-9]+", lex_int, priority = 2)]
516 Int(i64),
517
518 #[regex(r"-?[0-9]+\.[0-9]+", lex_float)]
520 Float(f64),
521
522 #[regex(r"[0-9]+[a-zA-Z_][a-zA-Z0-9_.-]*", lex_number_ident, priority = 3)]
531 NumberIdent(String),
532
533 #[regex(r"\.[0-9]+", lex_invalid_float_no_leading, priority = 3)]
535 InvalidFloatNoLeading,
536
537 #[regex(r"[0-9]+\.", lex_invalid_float_no_trailing, priority = 2)]
540 InvalidFloatNoTrailing,
541
542 #[regex(r"/[a-zA-Z0-9_./+-]*", lex_path)]
548 Path(String),
549
550 #[regex(r"[a-zA-Z_][a-zA-Z0-9_.-]*", lex_ident)]
557 Ident(String),
558
559 #[regex(r"#[^\n\r]*", allow_greedy = true)]
565 Comment,
566
567 #[regex(r"\n|\r\n")]
569 Newline,
570
571 #[regex(r"\\[ \t]*(\n|\r\n)")]
573 LineContinuation,
574
575 #[token("`", reject_backtick)]
584 BacktickRejected,
585}
586
587#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
592pub enum TokenCategory {
593 Keyword,
595 Operator,
597 String,
599 Number,
601 Variable,
603 Comment,
605 Punctuation,
607 Command,
609 Path,
611 Flag,
613 Error,
615}
616
617impl Token {
618 pub fn category(&self) -> TokenCategory {
620 match self {
621 Token::If
623 | Token::Then
624 | Token::Else
625 | Token::Elif
626 | Token::Fi
627 | Token::For
628 | Token::In
629 | Token::Do
630 | Token::Done
631 | Token::While
632 | Token::Case
633 | Token::Esac
634 | Token::Function
635 | Token::Return
636 | Token::Break
637 | Token::Continue
638 | Token::Exit
639 | Token::Set
640 | Token::Local
641 | Token::True
642 | Token::False
643 | Token::TypeString
644 | Token::TypeInt
645 | Token::TypeFloat
646 | Token::TypeBool => TokenCategory::Keyword,
647
648 Token::Pipe
650 | Token::And
651 | Token::Or
652 | Token::Amp
653 | Token::Eq
654 | Token::EqEq
655 | Token::NotEq
656 | Token::Match
657 | Token::NotMatch
658 | Token::Lt
659 | Token::Gt
660 | Token::LtEq
661 | Token::GtEq
662 | Token::GtGt
663 | Token::Stderr
664 | Token::Both
665 | Token::HereDocStart
666 | Token::HereString
667 | Token::StderrToStdout
668 | Token::StdoutToStderr
669 | Token::StdoutToStderr2 => TokenCategory::Operator,
670
671 Token::String(_) | Token::SingleString(_) | Token::HereDoc(_) => TokenCategory::String,
673
674 Token::Int(_) | Token::Float(_) | Token::Arithmetic(_) => TokenCategory::Number,
676
677 Token::VarRef(_)
679 | Token::SimpleVarRef(_)
680 | Token::Positional(_)
681 | Token::AllArgs
682 | Token::ArgCount
683 | Token::VarLength(_)
684 | Token::LastExitCode
685 | Token::CurrentPid => TokenCategory::Variable,
686
687 Token::LongFlag(_)
689 | Token::ShortFlag(_)
690 | Token::PlusFlag(_)
691 | Token::DoubleDash => TokenCategory::Flag,
692
693 Token::Semi
695 | Token::DoubleSemi
696 | Token::Colon
697 | Token::Comma
698 | Token::Dot
699 | Token::LParen
700 | Token::RParen
701 | Token::LBrace
702 | Token::RBrace
703 | Token::LBracket
704 | Token::RBracket
705 | Token::Bang
706 | Token::Question
707 | Token::Star
708 | Token::Newline
709 | Token::LineContinuation
710 | Token::CmdSubstStart => TokenCategory::Punctuation,
711
712 Token::GlobWord(_) => TokenCategory::Path,
714
715 Token::Comment => TokenCategory::Comment,
717
718 Token::Path(_)
720 | Token::TildePath(_)
721 | Token::RelativePath(_)
722 | Token::Tilde
723 | Token::DotDot
724 | Token::DotSlashPath(_) => TokenCategory::Path,
725
726 Token::Ident(_)
728 | Token::PlusBare(_)
729 | Token::MinusBare(_)
730 | Token::MinusAlone
731 | Token::NumberIdent(_)
732 | Token::DottedIdent(_) => TokenCategory::Command,
733
734 Token::InvalidFloatNoLeading
736 | Token::InvalidFloatNoTrailing
737 | Token::BacktickRejected => TokenCategory::Error,
738 }
739 }
740}
741
742fn lex_string(lex: &mut logos::Lexer<Token>) -> Result<String, LexerError> {
744 parse_string_literal(lex.slice())
745}
746
747fn lex_single_string(lex: &mut logos::Lexer<Token>) -> String {
749 let s = lex.slice();
750 s[1..s.len() - 1].to_string()
752}
753
754fn lex_varref(lex: &mut logos::Lexer<Token>) -> String {
756 lex.slice().to_string()
758}
759
760fn lex_simple_varref(lex: &mut logos::Lexer<Token>) -> String {
762 lex.slice()[1..].to_string()
764}
765
766fn lex_positional(lex: &mut logos::Lexer<Token>) -> usize {
768 lex.slice()[1..].parse().unwrap_or(0)
770}
771
772fn lex_var_length(lex: &mut logos::Lexer<Token>) -> String {
774 let s = lex.slice();
776 s[3..s.len() - 1].to_string()
777}
778
779fn lex_int(lex: &mut logos::Lexer<Token>) -> Result<i64, LexerError> {
781 lex.slice().parse().map_err(|_| LexerError::InvalidNumber)
782}
783
784fn lex_float(lex: &mut logos::Lexer<Token>) -> Result<f64, LexerError> {
786 lex.slice().parse().map_err(|_| LexerError::InvalidNumber)
787}
788
789fn lex_number_ident(lex: &mut logos::Lexer<Token>) -> String {
793 lex.slice().to_string()
794}
795
796fn lex_dotted_ident(lex: &mut logos::Lexer<Token>) -> String {
798 lex.slice().to_string()
799}
800
801fn lex_invalid_float_no_leading(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
804 Err(LexerError::InvalidFloatNoLeading)
805}
806
807fn reject_backtick(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
811 Err(LexerError::BackticksNotSupported)
812}
813
814fn lex_invalid_float_no_trailing(_lex: &mut logos::Lexer<Token>) -> Result<(), LexerError> {
817 Err(LexerError::InvalidFloatNoTrailing)
818}
819
820fn lex_ident(lex: &mut logos::Lexer<Token>) -> Result<String, LexerError> {
822 let s = lex.slice();
823
824 match s.to_lowercase().as_str() {
827 "true" | "false" if s != "true" && s != "false" => {
828 return Err(LexerError::AmbiguousBoolean(s.to_string()));
829 }
830 _ => {}
831 }
832
833 if s.eq_ignore_ascii_case("yes") || s.eq_ignore_ascii_case("no") {
835 return Err(LexerError::AmbiguousBooleanLike(s.to_string()));
836 }
837
838 Ok(s.to_string())
839}
840
841fn lex_long_flag(lex: &mut logos::Lexer<Token>) -> String {
843 lex.slice()[2..].to_string()
845}
846
847fn lex_short_flag(lex: &mut logos::Lexer<Token>) -> String {
849 lex.slice()[1..].to_string()
851}
852
853fn lex_plus_flag(lex: &mut logos::Lexer<Token>) -> String {
855 lex.slice()[1..].to_string()
857}
858
859fn lex_plus_bare(lex: &mut logos::Lexer<Token>) -> String {
861 lex.slice().to_string()
862}
863
864fn lex_minus_bare(lex: &mut logos::Lexer<Token>) -> String {
866 lex.slice().to_string()
867}
868
869fn lex_path(lex: &mut logos::Lexer<Token>) -> String {
871 lex.slice().to_string()
872}
873
874fn lex_tilde_path(lex: &mut logos::Lexer<Token>) -> String {
876 lex.slice().to_string()
877}
878
879fn lex_relative_path(lex: &mut logos::Lexer<Token>) -> String {
881 lex.slice().to_string()
882}
883
884fn lex_dot_slash_path(lex: &mut logos::Lexer<Token>) -> String {
886 lex.slice().to_string()
887}
888
889impl fmt::Display for Token {
890 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
891 match self {
892 Token::Set => write!(f, "set"),
893 Token::Local => write!(f, "local"),
894 Token::If => write!(f, "if"),
895 Token::Then => write!(f, "then"),
896 Token::Else => write!(f, "else"),
897 Token::Elif => write!(f, "elif"),
898 Token::Fi => write!(f, "fi"),
899 Token::For => write!(f, "for"),
900 Token::While => write!(f, "while"),
901 Token::In => write!(f, "in"),
902 Token::Do => write!(f, "do"),
903 Token::Done => write!(f, "done"),
904 Token::Case => write!(f, "case"),
905 Token::Esac => write!(f, "esac"),
906 Token::Function => write!(f, "function"),
907 Token::Break => write!(f, "break"),
908 Token::Continue => write!(f, "continue"),
909 Token::Return => write!(f, "return"),
910 Token::Exit => write!(f, "exit"),
911 Token::True => write!(f, "true"),
912 Token::False => write!(f, "false"),
913 Token::TypeString => write!(f, "string"),
914 Token::TypeInt => write!(f, "int"),
915 Token::TypeFloat => write!(f, "float"),
916 Token::TypeBool => write!(f, "bool"),
917 Token::And => write!(f, "&&"),
918 Token::Or => write!(f, "||"),
919 Token::EqEq => write!(f, "=="),
920 Token::NotEq => write!(f, "!="),
921 Token::Match => write!(f, "=~"),
922 Token::NotMatch => write!(f, "!~"),
923 Token::GtEq => write!(f, ">="),
924 Token::LtEq => write!(f, "<="),
925 Token::GtGt => write!(f, ">>"),
926 Token::StderrToStdout => write!(f, "2>&1"),
927 Token::StdoutToStderr => write!(f, "1>&2"),
928 Token::StdoutToStderr2 => write!(f, ">&2"),
929 Token::Stderr => write!(f, "2>"),
930 Token::Both => write!(f, "&>"),
931 Token::HereDocStart => write!(f, "<<"),
932 Token::HereString => write!(f, "<<<"),
933 Token::DoubleSemi => write!(f, ";;"),
934 Token::Eq => write!(f, "="),
935 Token::Pipe => write!(f, "|"),
936 Token::Amp => write!(f, "&"),
937 Token::Gt => write!(f, ">"),
938 Token::Lt => write!(f, "<"),
939 Token::Semi => write!(f, ";"),
940 Token::Colon => write!(f, ":"),
941 Token::Comma => write!(f, ","),
942 Token::Dot => write!(f, "."),
943 Token::DotDot => write!(f, ".."),
944 Token::Tilde => write!(f, "~"),
945 Token::TildePath(s) => write!(f, "{}", s),
946 Token::RelativePath(s) => write!(f, "{}", s),
947 Token::DotSlashPath(s) => write!(f, "{}", s),
948 Token::LBrace => write!(f, "{{"),
949 Token::RBrace => write!(f, "}}"),
950 Token::LBracket => write!(f, "["),
951 Token::RBracket => write!(f, "]"),
952 Token::LParen => write!(f, "("),
953 Token::RParen => write!(f, ")"),
954 Token::Star => write!(f, "*"),
955 Token::Bang => write!(f, "!"),
956 Token::Question => write!(f, "?"),
957 Token::GlobWord(s) => write!(f, "GLOB({})", s),
958 Token::Arithmetic(s) => write!(f, "ARITHMETIC({})", s),
959 Token::CmdSubstStart => write!(f, "$("),
960 Token::LongFlag(s) => write!(f, "--{}", s),
961 Token::ShortFlag(s) => write!(f, "-{}", s),
962 Token::PlusFlag(s) => write!(f, "+{}", s),
963 Token::DoubleDash => write!(f, "--"),
964 Token::PlusBare(s) => write!(f, "{}", s),
965 Token::MinusBare(s) => write!(f, "{}", s),
966 Token::MinusAlone => write!(f, "-"),
967 Token::String(s) => write!(f, "STRING({:?})", s),
968 Token::SingleString(s) => write!(f, "SINGLESTRING({:?})", s),
969 Token::HereDoc(d) => write!(f, "HEREDOC({:?}, literal={})", d.content, d.literal),
970 Token::VarRef(v) => write!(f, "VARREF({})", v),
971 Token::SimpleVarRef(v) => write!(f, "SIMPLEVARREF({})", v),
972 Token::Positional(n) => write!(f, "${}", n),
973 Token::AllArgs => write!(f, "$@"),
974 Token::ArgCount => write!(f, "$#"),
975 Token::LastExitCode => write!(f, "$?"),
976 Token::CurrentPid => write!(f, "$$"),
977 Token::VarLength(v) => write!(f, "${{#{}}}", v),
978 Token::Int(n) => write!(f, "INT({})", n),
979 Token::Float(n) => write!(f, "FLOAT({})", n),
980 Token::Path(s) => write!(f, "PATH({})", s),
981 Token::Ident(s) => write!(f, "IDENT({})", s),
982 Token::NumberIdent(s) => write!(f, "NUMIDENT({})", s),
983 Token::DottedIdent(s) => write!(f, "DOTIDENT({})", s),
984 Token::Comment => write!(f, "COMMENT"),
985 Token::Newline => write!(f, "NEWLINE"),
986 Token::LineContinuation => write!(f, "LINECONT"),
987 Token::InvalidFloatNoLeading => write!(f, "INVALID_FLOAT_NO_LEADING"),
989 Token::InvalidFloatNoTrailing => write!(f, "INVALID_FLOAT_NO_TRAILING"),
990 Token::BacktickRejected => write!(f, "BACKTICK_REJECTED"),
991 }
992 }
993}
994
995impl Token {
996 pub fn is_keyword(&self) -> bool {
1001 matches!(
1002 self,
1003 Token::Set
1004 | Token::Local
1005 | Token::If
1006 | Token::Then
1007 | Token::Else
1008 | Token::Elif
1009 | Token::Fi
1010 | Token::For
1011 | Token::In
1012 | Token::Do
1013 | Token::Done
1014 | Token::While
1015 | Token::Case
1016 | Token::Esac
1017 | Token::Function
1018 | Token::Return
1019 | Token::Break
1020 | Token::Continue
1021 | Token::Exit
1022 | Token::True
1023 | Token::False
1024 )
1025 }
1026
1027 pub fn is_type(&self) -> bool {
1029 matches!(
1030 self,
1031 Token::TypeString
1032 | Token::TypeInt
1033 | Token::TypeFloat
1034 | Token::TypeBool
1035 )
1036 }
1037
1038 pub fn starts_statement(&self) -> bool {
1041 matches!(
1042 self,
1043 Token::Set
1044 | Token::Local
1045 | Token::Function
1046 | Token::If
1047 | Token::For
1048 | Token::While
1049 | Token::Case
1050 | Token::Ident(_)
1051 | Token::LBracket
1052 )
1053 }
1054
1055 pub fn is_value(&self) -> bool {
1057 matches!(
1058 self,
1059 Token::String(_)
1060 | Token::SingleString(_)
1061 | Token::HereDoc(_)
1062 | Token::Arithmetic(_)
1063 | Token::Int(_)
1064 | Token::Float(_)
1065 | Token::True
1066 | Token::False
1067 | Token::VarRef(_)
1068 | Token::SimpleVarRef(_)
1069 | Token::CmdSubstStart
1070 | Token::Path(_)
1071 | Token::GlobWord(_)
1072 | Token::LastExitCode
1073 | Token::CurrentPid
1074 )
1075 }
1076}
1077
1078struct ArithmeticPreprocessResult {
1080 text: String,
1082 arithmetics: Vec<(String, String)>,
1084 replacements: Vec<SpanReplacement>,
1086}
1087
1088fn skip_command_substitution(
1097 chars: &[char],
1098 i: &mut usize,
1099 source_pos: &mut usize,
1100 result: &mut String,
1101) {
1102 result.push('$');
1104 result.push('(');
1105 *i += 2;
1106 *source_pos += 2;
1107
1108 let mut depth: usize = 1;
1109 let mut in_single_quote = false;
1110 let mut in_double_quote = false;
1111
1112 while *i < chars.len() && depth > 0 {
1113 let c = chars[*i];
1114
1115 if in_single_quote {
1116 result.push(c);
1117 *source_pos += c.len_utf8();
1118 *i += 1;
1119 if c == '\'' {
1120 in_single_quote = false;
1121 }
1122 continue;
1123 }
1124
1125 if in_double_quote {
1126 if c == '\\' && *i + 1 < chars.len() {
1127 let next = chars[*i + 1];
1128 if next == '"' || next == '\\' || next == '$' || next == '`' {
1129 result.push(c);
1130 result.push(next);
1131 *source_pos += c.len_utf8() + next.len_utf8();
1132 *i += 2;
1133 continue;
1134 }
1135 }
1136 if c == '"' {
1137 in_double_quote = false;
1138 }
1139 result.push(c);
1140 *source_pos += c.len_utf8();
1141 *i += 1;
1142 continue;
1143 }
1144
1145 match c {
1147 '\'' => {
1148 in_single_quote = true;
1149 result.push(c);
1150 *source_pos += c.len_utf8();
1151 *i += 1;
1152 }
1153 '"' => {
1154 in_double_quote = true;
1155 result.push(c);
1156 *source_pos += c.len_utf8();
1157 *i += 1;
1158 }
1159 '\\' if *i + 1 < chars.len() => {
1160 result.push(c);
1161 result.push(chars[*i + 1]);
1162 *source_pos += c.len_utf8() + chars[*i + 1].len_utf8();
1163 *i += 2;
1164 }
1165 '(' => {
1166 depth += 1;
1167 result.push(c);
1168 *source_pos += c.len_utf8();
1169 *i += 1;
1170 }
1171 ')' => {
1172 depth -= 1;
1173 result.push(c);
1174 *source_pos += c.len_utf8();
1175 *i += 1;
1176 }
1177 _ => {
1178 result.push(c);
1179 *source_pos += c.len_utf8();
1180 *i += 1;
1181 }
1182 }
1183 }
1184}
1185
1186fn preprocess_arithmetic(source: &str) -> Result<ArithmeticPreprocessResult, LexerError> {
1200 let mut result = String::with_capacity(source.len());
1201 let mut arithmetics: Vec<(String, String)> = Vec::new();
1202 let mut replacements: Vec<SpanReplacement> = Vec::new();
1203 let mut source_pos: usize = 0;
1204 let chars_vec: Vec<char> = source.chars().collect();
1205 let mut i = 0;
1206
1207 let mut in_double_quote = false;
1210
1211 while i < chars_vec.len() {
1212 let ch = chars_vec[i];
1213
1214 if !in_double_quote && ch == '\\' && i + 1 < chars_vec.len() {
1216 result.push(ch);
1217 result.push(chars_vec[i + 1]);
1218 source_pos += ch.len_utf8() + chars_vec[i + 1].len_utf8();
1219 i += 2;
1220 continue;
1221 }
1222
1223 if ch == '\'' && !in_double_quote {
1225 result.push(ch);
1226 i += 1;
1227 source_pos += 1;
1228 while i < chars_vec.len() && chars_vec[i] != '\'' {
1229 result.push(chars_vec[i]);
1230 source_pos += chars_vec[i].len_utf8();
1231 i += 1;
1232 }
1233 if i < chars_vec.len() {
1234 result.push(chars_vec[i]); source_pos += 1;
1236 i += 1;
1237 }
1238 continue;
1239 }
1240
1241 if ch == '"' {
1243 in_double_quote = !in_double_quote;
1244 result.push(ch);
1245 i += 1;
1246 source_pos += 1;
1247 continue;
1248 }
1249
1250 if in_double_quote && ch == '\\' && i + 1 < chars_vec.len() {
1252 let next = chars_vec[i + 1];
1253 if next == '"' || next == '\\' || next == '$' || next == '`' {
1254 result.push(ch);
1255 result.push(next);
1256 source_pos += ch.len_utf8() + next.len_utf8();
1257 i += 2;
1258 continue;
1259 }
1260 }
1261
1262 if ch == '#' && !in_double_quote {
1270 while i < chars_vec.len() && chars_vec[i] != '\n' && chars_vec[i] != '\r' {
1271 result.push(chars_vec[i]);
1272 source_pos += chars_vec[i].len_utf8();
1273 i += 1;
1274 }
1275 continue;
1276 }
1277
1278 if ch == '$' && i + 1 < chars_vec.len() && chars_vec[i + 1] == '('
1280 && !(i + 2 < chars_vec.len() && chars_vec[i + 2] == '(')
1281 {
1282 skip_command_substitution(&chars_vec, &mut i, &mut source_pos, &mut result);
1283 continue;
1284 }
1285
1286 if ch == '$' && i + 2 < chars_vec.len() && chars_vec[i + 1] == '(' && chars_vec[i + 2] == '(' {
1288 let arith_start_pos = result.len();
1289 let original_start = source_pos;
1290
1291 i += 3;
1293 source_pos += 3;
1294
1295 let mut expr = String::new();
1297 let mut paren_depth: usize = 0;
1298
1299 while i < chars_vec.len() {
1300 let c = chars_vec[i];
1301 match c {
1302 '(' => {
1303 paren_depth += 1;
1304 if paren_depth > MAX_PAREN_DEPTH {
1305 return Err(LexerError::NestingTooDeep);
1306 }
1307 expr.push('(');
1308 i += 1;
1309 source_pos += c.len_utf8();
1310 }
1311 ')' => {
1312 if paren_depth > 0 {
1313 paren_depth -= 1;
1314 expr.push(')');
1315 i += 1;
1316 source_pos += 1;
1317 } else if i + 1 < chars_vec.len() && chars_vec[i + 1] == ')' {
1318 i += 2;
1320 source_pos += 2;
1321 break;
1322 } else {
1323 expr.push(')');
1325 i += 1;
1326 source_pos += 1;
1327 }
1328 }
1329 _ => {
1330 expr.push(c);
1331 i += 1;
1332 source_pos += c.len_utf8();
1333 }
1334 }
1335 }
1336
1337 let original_len = source_pos - original_start;
1339
1340 let marker = format!("__KAISH_ARITH_{}__", unique_marker_id());
1342 let marker_len = marker.len();
1343
1344 replacements.push(SpanReplacement {
1346 preprocessed_pos: arith_start_pos,
1347 marker_len,
1348 original_len,
1349 });
1350
1351 arithmetics.push((marker.clone(), expr));
1352 result.push_str(&marker);
1353 } else {
1354 result.push(ch);
1355 i += 1;
1356 source_pos += ch.len_utf8();
1357 }
1358 }
1359
1360 Ok(ArithmeticPreprocessResult {
1361 text: result,
1362 arithmetics,
1363 replacements,
1364 })
1365}
1366
1367#[derive(Debug, Clone)]
1382struct HeredocReplacement {
1383 marker: String,
1384 body: String,
1385 literal: bool,
1386 strip_tabs: bool,
1387 body_start_offset: usize,
1388}
1389
1390fn preprocess_heredocs(source: &str) -> Result<(String, Vec<HeredocReplacement>), Spanned<LexerError>> {
1402 let mut result = String::with_capacity(source.len());
1403 let mut heredocs: Vec<HeredocReplacement> = Vec::new();
1404 let chars_vec: Vec<char> = source.chars().collect();
1405 let mut i = 0;
1406 let mut pos: usize = 0;
1410
1411 while i < chars_vec.len() {
1412 let ch = chars_vec[i];
1413
1414 if ch == '<'
1418 && chars_vec.get(i + 1) == Some(&'<')
1419 && chars_vec.get(i + 2) == Some(&'<')
1420 {
1421 result.push_str("<<<");
1422 i += 3;
1423 pos += 3;
1424 continue;
1425 }
1426
1427 if ch == '<' && chars_vec.get(i + 1) == Some(&'<') {
1429 let introducer_start = pos;
1432 i += 2; pos += 2;
1434
1435 let strip_tabs = chars_vec.get(i) == Some(&'-');
1437 if strip_tabs {
1438 i += 1;
1439 pos += 1;
1440 }
1441
1442 while let Some(&c) = chars_vec.get(i) {
1444 if c == ' ' || c == '\t' {
1445 i += 1;
1446 pos += 1;
1447 } else {
1448 break;
1449 }
1450 }
1451
1452 let mut delimiter = String::new();
1454 let quoted = chars_vec.get(i) == Some(&'\'') || chars_vec.get(i) == Some(&'"');
1455 let quote_char = if quoted {
1456 let q = chars_vec.get(i).copied();
1457 i += 1;
1458 pos += 1;
1459 q
1460 } else {
1461 None
1462 };
1463
1464 while let Some(&c) = chars_vec.get(i) {
1465 if quoted {
1466 if Some(c) == quote_char {
1467 i += 1; pos += 1;
1469 break;
1470 }
1471 } else if c.is_whitespace() || c == '\n' || c == '\r' {
1472 break;
1473 }
1474 delimiter.push(c);
1475 i += 1;
1476 pos += c.len_utf8();
1477 }
1478
1479 if delimiter.is_empty() {
1480 result.push_str("<<");
1482 if strip_tabs {
1483 result.push('-');
1484 }
1485 continue;
1486 }
1487
1488 let mut after_delimiter = String::new();
1491 while let Some(&c) = chars_vec.get(i) {
1492 if c == '\n' {
1493 i += 1;
1494 pos += 1;
1495 break;
1496 } else if c == '\r' {
1497 i += 1;
1498 pos += 1;
1499 if chars_vec.get(i) == Some(&'\n') {
1500 i += 1;
1501 pos += 1;
1502 }
1503 break;
1504 }
1505 after_delimiter.push(c);
1506 i += 1;
1507 pos += c.len_utf8();
1508 }
1509
1510 let body_start_offset = pos;
1516 let mut content = String::new();
1517 let mut current_line = String::new();
1518
1519 loop {
1520 let next = chars_vec.get(i).copied();
1521 match next {
1522 Some('\n') => {
1523 i += 1;
1524 pos += 1;
1525 let trimmed = if strip_tabs {
1527 current_line.trim_start_matches('\t')
1528 } else {
1529 ¤t_line
1530 };
1531 if trimmed == delimiter {
1532 break;
1534 }
1535 content.push_str(¤t_line);
1537 content.push('\n');
1538 current_line.clear();
1539 }
1540 Some('\r') => {
1541 i += 1;
1542 pos += 1;
1543 let crlf = chars_vec.get(i) == Some(&'\n');
1549 if crlf {
1550 i += 1;
1551 pos += 1;
1552 }
1553 let trimmed = if strip_tabs {
1554 current_line.trim_start_matches('\t')
1555 } else {
1556 ¤t_line
1557 };
1558 if trimmed == delimiter {
1559 break;
1560 }
1561 content.push_str(¤t_line);
1562 content.push_str(if crlf { "\r\n" } else { "\r" });
1563 current_line.clear();
1564 }
1565 Some(c) => {
1566 current_line.push(c);
1567 i += 1;
1568 pos += c.len_utf8();
1569 }
1570 None => {
1571 let trimmed = if strip_tabs {
1574 current_line.trim_start_matches('\t')
1575 } else {
1576 ¤t_line
1577 };
1578 if trimmed == delimiter {
1579 break;
1580 }
1581 let span_end = introducer_start
1586 + 2
1587 + if strip_tabs { 1 } else { 0 }
1588 + delimiter.len();
1589 return Err(Spanned::new(
1590 LexerError::UnterminatedHeredoc {
1591 delimiter: delimiter.clone(),
1592 },
1593 introducer_start..span_end,
1594 ));
1595 }
1596 }
1597 }
1598
1599 let marker = format!("__KAISH_HEREDOC_{}__", unique_marker_id());
1601 heredocs.push(HeredocReplacement {
1602 marker: marker.clone(),
1603 body: content,
1604 literal: quoted,
1605 strip_tabs,
1606 body_start_offset,
1607 });
1608
1609 result.push_str("<<");
1612 result.push_str(&marker);
1613 result.push_str(&after_delimiter);
1614 result.push('\n');
1615 } else {
1616 result.push(ch);
1617 i += 1;
1618 pos += ch.len_utf8();
1619 }
1620 }
1621
1622 Ok((result, heredocs))
1623}
1624
1625fn mergeable_text(token: &Token) -> Option<String> {
1630 match token {
1631 Token::Ident(s) => Some(s.clone()),
1632 Token::NumberIdent(s) => Some(s.clone()),
1633 Token::DottedIdent(s) => Some(s.clone()),
1634 Token::Colon => Some(":".to_string()),
1635 Token::Int(n) => Some(n.to_string()),
1636 Token::Path(p) => Some(p.clone()),
1637 Token::Float(f) => Some(f.to_string()),
1638 _ => None,
1639 }
1640}
1641
1642fn merge_colon_adjacent(tokens: Vec<Spanned<Token>>) -> Vec<Spanned<Token>> {
1651 if tokens.is_empty() {
1652 return tokens;
1653 }
1654
1655 let mut result = Vec::with_capacity(tokens.len());
1656 let mut run: Vec<&Spanned<Token>> = Vec::new();
1657
1658 for token in &tokens {
1659 if run.is_empty() {
1660 if mergeable_text(&token.token).is_some() {
1661 run.push(token);
1662 } else {
1663 result.push(token.clone());
1664 }
1665 continue;
1666 }
1667
1668 let Some(last) = run.last() else { unreachable!() };
1671 let adjacent = last.span.end == token.span.start;
1672
1673 if adjacent && mergeable_text(&token.token).is_some() {
1674 run.push(token);
1675 } else {
1676 flush_colon_run(&mut run, &mut result);
1677 if mergeable_text(&token.token).is_some() {
1678 run.push(token);
1679 } else {
1680 result.push(token.clone());
1681 }
1682 }
1683 }
1684
1685 flush_colon_run(&mut run, &mut result);
1686
1687 result
1688}
1689
1690fn flush_colon_run(run: &mut Vec<&Spanned<Token>>, result: &mut Vec<Spanned<Token>>) {
1692 if run.is_empty() {
1693 return;
1694 }
1695
1696 let has_colon = run.iter().any(|t| matches!(t.token, Token::Colon));
1697
1698 if run.len() >= 2 && has_colon {
1699 let text: String = run
1700 .iter()
1701 .filter_map(|t| mergeable_text(&t.token))
1702 .collect();
1703 let start = run.first().map(|t| t.span.start).unwrap_or(0);
1705 let end = run.last().map(|t| t.span.end).unwrap_or(0);
1706 result.push(Spanned::new(Token::Ident(text), start..end));
1707 } else {
1708 for t in run.iter() {
1709 result.push((*t).clone());
1710 }
1711 }
1712
1713 run.clear();
1714}
1715
1716fn glob_mergeable_text(token: &Token) -> Option<String> {
1721 match token {
1722 Token::Star => Some("*".to_string()),
1723 Token::Question => Some("?".to_string()),
1724 Token::Dot => Some(".".to_string()),
1725 Token::DotDot => Some("..".to_string()),
1726 Token::Ident(s) => Some(s.clone()),
1727 Token::NumberIdent(s) => Some(s.clone()),
1728 Token::DottedIdent(s) => Some(s.clone()),
1729 Token::Path(s) => Some(s.clone()),
1730 Token::Int(n) => Some(n.to_string()),
1731 Token::LBracket => Some("[".to_string()),
1732 Token::RBracket => Some("]".to_string()),
1733 Token::Bang => Some("!".to_string()),
1734 Token::DotSlashPath(s) => Some(s.clone()),
1735 Token::RelativePath(s) => Some(s.clone()),
1736 Token::TildePath(s) => Some(s.clone()),
1737 Token::Tilde => Some("~".to_string()),
1738 Token::LBrace => Some("{".to_string()),
1739 Token::RBrace => Some("}".to_string()),
1740 Token::Comma => Some(",".to_string()),
1741 _ => None,
1742 }
1743}
1744
1745fn merge_glob_adjacent(tokens: Vec<Spanned<Token>>) -> Vec<Spanned<Token>> {
1753 if tokens.is_empty() {
1754 return tokens;
1755 }
1756
1757 let mut result = Vec::with_capacity(tokens.len());
1758 let mut run: Vec<&Spanned<Token>> = Vec::new();
1759
1760 for token in &tokens {
1761 if run.is_empty() {
1762 if glob_mergeable_text(&token.token).is_some() {
1763 run.push(token);
1764 } else {
1765 result.push(token.clone());
1766 }
1767 continue;
1768 }
1769
1770 let Some(last) = run.last() else { unreachable!() };
1772 let adjacent = last.span.end == token.span.start;
1773
1774 if adjacent && glob_mergeable_text(&token.token).is_some() {
1775 run.push(token);
1776 } else {
1777 flush_glob_run(&mut run, &mut result);
1778 if glob_mergeable_text(&token.token).is_some() {
1779 run.push(token);
1780 } else {
1781 result.push(token.clone());
1782 }
1783 }
1784 }
1785
1786 flush_glob_run(&mut run, &mut result);
1787
1788 result
1789}
1790
1791fn flush_glob_run(run: &mut Vec<&Spanned<Token>>, result: &mut Vec<Spanned<Token>>) {
1793 if run.is_empty() {
1794 return;
1795 }
1796
1797 let has_glob = run.iter().any(|t| {
1798 matches!(t.token, Token::Star | Token::Question)
1799 }) || (run.iter().any(|t| matches!(t.token, Token::LBracket))
1800 && run.iter().any(|t| matches!(t.token, Token::RBracket)));
1801
1802 if run.len() >= 2 && has_glob {
1803 let text: String = run
1804 .iter()
1805 .filter_map(|t| glob_mergeable_text(&t.token))
1806 .collect();
1807 let start = run.first().map(|t| t.span.start).unwrap_or(0);
1808 let end = run.last().map(|t| t.span.end).unwrap_or(0);
1809 result.push(Spanned::new(Token::GlobWord(text), start..end));
1810 } else {
1811 for t in run.iter() {
1812 result.push((*t).clone());
1813 }
1814 }
1815
1816 run.clear();
1817}
1818
1819pub fn tokenize(source: &str) -> Result<Vec<Spanned<Token>>, Vec<Spanned<LexerError>>> {
1829 let arith_result = preprocess_arithmetic(source)
1831 .map_err(|e| vec![Spanned::new(e, 0..source.len())])?;
1832
1833 let span_replacements = arith_result.replacements;
1837 let (preprocessed, heredocs) = preprocess_heredocs(&arith_result.text)
1838 .map_err(|e| {
1839 let span = correct_span(e.span, &span_replacements);
1840 vec![Spanned::new(e.token, span)]
1841 })?;
1842
1843 let lexer = Token::lexer(&preprocessed);
1844 let mut tokens = Vec::new();
1845 let mut errors = Vec::new();
1846
1847 for (result, span) in lexer.spanned() {
1848 let corrected_span = correct_span(span, &span_replacements);
1850 match result {
1851 Ok(token) => {
1852 if !matches!(token, Token::Comment | Token::LineContinuation) {
1854 tokens.push(Spanned::new(token, corrected_span));
1855 }
1856 }
1857 Err(err) => {
1858 errors.push(Spanned::new(err, corrected_span));
1859 }
1860 }
1861 }
1862
1863 if !errors.is_empty() {
1864 return Err(errors);
1865 }
1866
1867 let mut final_tokens = Vec::with_capacity(tokens.len());
1869 let mut i = 0;
1870
1871 while i < tokens.len() {
1872 if let Token::Ident(ref name) = tokens[i].token
1874 && name.starts_with("__KAISH_ARITH_") && name.ends_with("__")
1875 && let Some((_, expr)) = arith_result.arithmetics.iter().find(|(marker, _)| marker == name) {
1876 final_tokens.push(Spanned::new(Token::Arithmetic(expr.clone()), tokens[i].span.clone()));
1877 i += 1;
1878 continue;
1879 }
1880
1881 if matches!(tokens[i].token, Token::HereDocStart) {
1883 if i + 1 < tokens.len()
1885 && let Token::Ident(ref name) = tokens[i + 1].token
1886 && name.starts_with("__KAISH_HEREDOC_") && name.ends_with("__") {
1887 if let Some(hd) = heredocs.iter().find(|h| h.marker == *name) {
1889 let mut content = hd.body.clone();
1901 for (marker, expr) in &arith_result.arithmetics {
1902 if content.contains(marker) {
1903 let replacement = if hd.literal {
1904 format!("$(({}))", expr)
1905 } else {
1906 format!("${{__ARITH:{}__}}", expr)
1907 };
1908 content = content.replace(marker, &replacement);
1909 }
1910 }
1911 final_tokens.push(Spanned::new(Token::HereDocStart, tokens[i].span.clone()));
1912 final_tokens.push(Spanned::new(
1913 Token::HereDoc(HereDocData {
1914 content,
1915 literal: hd.literal,
1916 strip_tabs: hd.strip_tabs,
1917 body_start_offset: hd.body_start_offset,
1918 }),
1919 tokens[i + 1].span.clone(),
1920 ));
1921 i += 2;
1922 continue;
1923 }
1924 }
1925 }
1926
1927 let token = if let Token::String(ref s) = tokens[i].token {
1929 let mut new_content = s.clone();
1931 for (marker, expr) in &arith_result.arithmetics {
1932 if new_content.contains(marker) {
1933 new_content = new_content.replace(marker, &format!("${{__ARITH:{}__}}", expr));
1936 }
1937 }
1938 if new_content != *s {
1939 Spanned::new(Token::String(new_content), tokens[i].span.clone())
1940 } else {
1941 tokens[i].clone()
1942 }
1943 } else {
1944 tokens[i].clone()
1945 };
1946 final_tokens.push(token);
1947 i += 1;
1948 }
1949
1950 Ok(merge_glob_adjacent(merge_colon_adjacent(final_tokens)))
1951}
1952
1953pub fn tokenize_with_comments(source: &str) -> Result<Vec<Spanned<Token>>, Vec<Spanned<LexerError>>> {
1957 let lexer = Token::lexer(source);
1958 let mut tokens = Vec::new();
1959 let mut errors = Vec::new();
1960
1961 for (result, span) in lexer.spanned() {
1962 match result {
1963 Ok(token) => {
1964 tokens.push(Spanned::new(token, span));
1965 }
1966 Err(err) => {
1967 errors.push(Spanned::new(err, span));
1968 }
1969 }
1970 }
1971
1972 if errors.is_empty() {
1973 Ok(tokens)
1974 } else {
1975 Err(errors)
1976 }
1977}
1978
1979pub fn parse_string_literal(source: &str) -> Result<String, LexerError> {
1981 if source.len() < 2 || !source.starts_with('"') || !source.ends_with('"') {
1983 return Err(LexerError::UnterminatedString);
1984 }
1985
1986 let inner = &source[1..source.len() - 1];
1987 let mut result = String::with_capacity(inner.len());
1988 let mut chars = inner.chars().peekable();
1989
1990 while let Some(ch) = chars.next() {
1991 if ch == '\\' {
1992 match chars.next() {
1993 Some('n') => result.push('\n'),
1994 Some('t') => result.push('\t'),
1995 Some('r') => result.push('\r'),
1996 Some('\\') => result.push('\\'),
1997 Some('"') => result.push('"'),
1998 Some('$') => result.push_str("__KAISH_ESCAPED_DOLLAR__"),
2001 Some('u') => {
2002 let mut hex = String::with_capacity(4);
2004 for _ in 0..4 {
2005 match chars.next() {
2006 Some(h) if h.is_ascii_hexdigit() => hex.push(h),
2007 _ => return Err(LexerError::InvalidEscape),
2008 }
2009 }
2010 let codepoint = u32::from_str_radix(&hex, 16)
2011 .map_err(|_| LexerError::InvalidEscape)?;
2012 let ch = char::from_u32(codepoint)
2013 .ok_or(LexerError::InvalidEscape)?;
2014 result.push(ch);
2015 }
2016 Some(next) => {
2018 result.push('\\');
2019 result.push(next);
2020 }
2021 None => return Err(LexerError::InvalidEscape),
2022 }
2023 } else {
2024 result.push(ch);
2025 }
2026 }
2027
2028 Ok(result)
2029}
2030
2031pub fn parse_var_ref(source: &str) -> Result<Vec<String>, LexerError> {
2034 if source.len() < 4 || !source.starts_with("${") || !source.ends_with('}') {
2036 return Err(LexerError::UnterminatedVarRef);
2037 }
2038
2039 let inner = &source[2..source.len() - 1];
2040
2041 if inner == "?" {
2043 return Ok(vec!["?".to_string()]);
2044 }
2045
2046 let mut segments = Vec::new();
2047 let mut current = String::new();
2048 let mut chars = inner.chars().peekable();
2049
2050 while let Some(ch) = chars.next() {
2051 match ch {
2052 '.' => {
2053 if !current.is_empty() {
2054 segments.push(current.clone());
2055 current.clear();
2056 }
2057 }
2058 '[' => {
2059 if !current.is_empty() {
2060 segments.push(current.clone());
2061 current.clear();
2062 }
2063 let mut index = String::from("[");
2065 while let Some(&c) = chars.peek() {
2066 if let Some(c) = chars.next() {
2067 index.push(c);
2068 }
2069 if c == ']' {
2070 break;
2071 }
2072 }
2073 segments.push(index);
2074 }
2075 _ => {
2076 current.push(ch);
2077 }
2078 }
2079 }
2080
2081 if !current.is_empty() {
2082 segments.push(current);
2083 }
2084
2085 Ok(segments)
2086}
2087
2088pub fn parse_int(source: &str) -> Result<i64, LexerError> {
2090 source.parse().map_err(|_| LexerError::InvalidNumber)
2091}
2092
2093pub fn parse_float(source: &str) -> Result<f64, LexerError> {
2095 source.parse().map_err(|_| LexerError::InvalidNumber)
2096}
2097
2098#[cfg(test)]
2099mod tests {
2100 use super::*;
2101
2102 fn lex(source: &str) -> Vec<Token> {
2103 tokenize(source)
2104 .expect("lexer should succeed")
2105 .into_iter()
2106 .map(|s| s.token)
2107 .collect()
2108 }
2109
2110 #[test]
2115 fn keywords() {
2116 assert_eq!(lex("set"), vec![Token::Set]);
2117 assert_eq!(lex("if"), vec![Token::If]);
2118 assert_eq!(lex("then"), vec![Token::Then]);
2119 assert_eq!(lex("else"), vec![Token::Else]);
2120 assert_eq!(lex("elif"), vec![Token::Elif]);
2121 assert_eq!(lex("fi"), vec![Token::Fi]);
2122 assert_eq!(lex("for"), vec![Token::For]);
2123 assert_eq!(lex("in"), vec![Token::In]);
2124 assert_eq!(lex("do"), vec![Token::Do]);
2125 assert_eq!(lex("done"), vec![Token::Done]);
2126 assert_eq!(lex("case"), vec![Token::Case]);
2127 assert_eq!(lex("esac"), vec![Token::Esac]);
2128 assert_eq!(lex("function"), vec![Token::Function]);
2129 assert_eq!(lex("true"), vec![Token::True]);
2130 assert_eq!(lex("false"), vec![Token::False]);
2131 }
2132
2133 #[test]
2134 fn double_semicolon() {
2135 assert_eq!(lex(";;"), vec![Token::DoubleSemi]);
2136 assert_eq!(lex("echo \"hi\";;"), vec![
2138 Token::Ident("echo".to_string()),
2139 Token::String("hi".to_string()),
2140 Token::DoubleSemi,
2141 ]);
2142 }
2143
2144 #[test]
2145 fn type_keywords() {
2146 assert_eq!(lex("string"), vec![Token::TypeString]);
2147 assert_eq!(lex("int"), vec![Token::TypeInt]);
2148 assert_eq!(lex("float"), vec![Token::TypeFloat]);
2149 assert_eq!(lex("bool"), vec![Token::TypeBool]);
2150 }
2151
2152 #[test]
2157 fn single_char_operators() {
2158 assert_eq!(lex("="), vec![Token::Eq]);
2159 assert_eq!(lex("|"), vec![Token::Pipe]);
2160 assert_eq!(lex("&"), vec![Token::Amp]);
2161 assert_eq!(lex(">"), vec![Token::Gt]);
2162 assert_eq!(lex("<"), vec![Token::Lt]);
2163 assert_eq!(lex(";"), vec![Token::Semi]);
2164 assert_eq!(lex(":"), vec![Token::Colon]);
2165 assert_eq!(lex(","), vec![Token::Comma]);
2166 assert_eq!(lex("."), vec![Token::Dot]);
2167 }
2168
2169 #[test]
2170 fn multi_char_operators() {
2171 assert_eq!(lex("&&"), vec![Token::And]);
2172 assert_eq!(lex("||"), vec![Token::Or]);
2173 assert_eq!(lex("=="), vec![Token::EqEq]);
2174 assert_eq!(lex("!="), vec![Token::NotEq]);
2175 assert_eq!(lex("=~"), vec![Token::Match]);
2176 assert_eq!(lex("!~"), vec![Token::NotMatch]);
2177 assert_eq!(lex(">="), vec![Token::GtEq]);
2178 assert_eq!(lex("<="), vec![Token::LtEq]);
2179 assert_eq!(lex(">>"), vec![Token::GtGt]);
2180 assert_eq!(lex("2>"), vec![Token::Stderr]);
2181 assert_eq!(lex("&>"), vec![Token::Both]);
2182 }
2183
2184 #[test]
2185 fn brackets() {
2186 assert_eq!(lex("{"), vec![Token::LBrace]);
2187 assert_eq!(lex("}"), vec![Token::RBrace]);
2188 assert_eq!(lex("["), vec![Token::LBracket]);
2189 assert_eq!(lex("]"), vec![Token::RBracket]);
2190 assert_eq!(lex("("), vec![Token::LParen]);
2191 assert_eq!(lex(")"), vec![Token::RParen]);
2192 }
2193
2194 #[test]
2199 fn integers() {
2200 assert_eq!(lex("0"), vec![Token::Int(0)]);
2201 assert_eq!(lex("42"), vec![Token::Int(42)]);
2202 assert_eq!(lex("-1"), vec![Token::Int(-1)]);
2203 assert_eq!(lex("999999"), vec![Token::Int(999999)]);
2204 }
2205
2206 #[test]
2207 fn floats() {
2208 assert_eq!(lex("3.14"), vec![Token::Float(3.14)]);
2209 assert_eq!(lex("-0.5"), vec![Token::Float(-0.5)]);
2210 assert_eq!(lex("123.456"), vec![Token::Float(123.456)]);
2211 }
2212
2213 #[test]
2214 fn strings() {
2215 assert_eq!(lex(r#""hello""#), vec![Token::String("hello".to_string())]);
2216 assert_eq!(lex(r#""hello world""#), vec![Token::String("hello world".to_string())]);
2217 assert_eq!(lex(r#""""#), vec![Token::String("".to_string())]); assert_eq!(lex(r#""with \"quotes\"""#), vec![Token::String("with \"quotes\"".to_string())]);
2219 assert_eq!(lex(r#""with\nnewline""#), vec![Token::String("with\nnewline".to_string())]);
2220 }
2221
2222 #[test]
2223 fn var_refs() {
2224 assert_eq!(lex("${X}"), vec![Token::VarRef("${X}".to_string())]);
2225 assert_eq!(lex("${VAR}"), vec![Token::VarRef("${VAR}".to_string())]);
2226 assert_eq!(lex("${VAR.field}"), vec![Token::VarRef("${VAR.field}".to_string())]);
2227 assert_eq!(lex("${VAR[0]}"), vec![Token::VarRef("${VAR[0]}".to_string())]);
2228 }
2229
2230 #[test]
2235 fn identifiers() {
2236 assert_eq!(lex("foo"), vec![Token::Ident("foo".to_string())]);
2237 assert_eq!(lex("foo_bar"), vec![Token::Ident("foo_bar".to_string())]);
2238 assert_eq!(lex("foo-bar"), vec![Token::Ident("foo-bar".to_string())]);
2239 assert_eq!(lex("_private"), vec![Token::Ident("_private".to_string())]);
2240 assert_eq!(lex("cmd123"), vec![Token::Ident("cmd123".to_string())]);
2241 }
2242
2243 #[test]
2244 fn keyword_prefix_identifiers() {
2245 assert_eq!(lex("setup"), vec![Token::Ident("setup".to_string())]);
2247 assert_eq!(lex("kaish-tools"), vec![Token::Ident("kaish-tools".to_string())]);
2248 assert_eq!(lex("iffy"), vec![Token::Ident("iffy".to_string())]);
2249 assert_eq!(lex("forked"), vec![Token::Ident("forked".to_string())]);
2250 assert_eq!(lex("done-with-it"), vec![Token::Ident("done-with-it".to_string())]);
2251 }
2252
2253 #[test]
2258 fn assignment() {
2259 assert_eq!(
2260 lex("set X = 5"),
2261 vec![Token::Set, Token::Ident("X".to_string()), Token::Eq, Token::Int(5)]
2262 );
2263 }
2264
2265 #[test]
2266 fn command_simple() {
2267 assert_eq!(lex("echo"), vec![Token::Ident("echo".to_string())]);
2268 assert_eq!(
2269 lex(r#"echo "hello""#),
2270 vec![Token::Ident("echo".to_string()), Token::String("hello".to_string())]
2271 );
2272 }
2273
2274 #[test]
2275 fn command_with_args() {
2276 assert_eq!(
2277 lex("cmd arg1 arg2"),
2278 vec![Token::Ident("cmd".to_string()), Token::Ident("arg1".to_string()), Token::Ident("arg2".to_string())]
2279 );
2280 }
2281
2282 #[test]
2283 fn command_with_named_args() {
2284 assert_eq!(
2285 lex("cmd key=value"),
2286 vec![Token::Ident("cmd".to_string()), Token::Ident("key".to_string()), Token::Eq, Token::Ident("value".to_string())]
2287 );
2288 }
2289
2290 #[test]
2291 fn pipeline() {
2292 assert_eq!(
2293 lex("a | b | c"),
2294 vec![Token::Ident("a".to_string()), Token::Pipe, Token::Ident("b".to_string()), Token::Pipe, Token::Ident("c".to_string())]
2295 );
2296 }
2297
2298 #[test]
2299 fn if_statement() {
2300 assert_eq!(
2301 lex("if true; then echo; fi"),
2302 vec![
2303 Token::If,
2304 Token::True,
2305 Token::Semi,
2306 Token::Then,
2307 Token::Ident("echo".to_string()),
2308 Token::Semi,
2309 Token::Fi
2310 ]
2311 );
2312 }
2313
2314 #[test]
2315 fn for_loop() {
2316 assert_eq!(
2317 lex("for X in items; do echo; done"),
2318 vec![
2319 Token::For,
2320 Token::Ident("X".to_string()),
2321 Token::In,
2322 Token::Ident("items".to_string()),
2323 Token::Semi,
2324 Token::Do,
2325 Token::Ident("echo".to_string()),
2326 Token::Semi,
2327 Token::Done
2328 ]
2329 );
2330 }
2331
2332 #[test]
2337 fn whitespace_ignored() {
2338 assert_eq!(lex(" set X = 5 "), lex("set X = 5"));
2339 }
2340
2341 #[test]
2342 fn newlines_preserved() {
2343 let tokens = lex("a\nb");
2344 assert_eq!(
2345 tokens,
2346 vec![Token::Ident("a".to_string()), Token::Newline, Token::Ident("b".to_string())]
2347 );
2348 }
2349
2350 #[test]
2351 fn multiple_newlines() {
2352 let tokens = lex("a\n\n\nb");
2353 assert_eq!(
2354 tokens,
2355 vec![Token::Ident("a".to_string()), Token::Newline, Token::Newline, Token::Newline, Token::Ident("b".to_string())]
2356 );
2357 }
2358
2359 #[test]
2364 fn comments_skipped() {
2365 assert_eq!(lex("# comment"), vec![]);
2366 assert_eq!(lex("a # comment"), vec![Token::Ident("a".to_string())]);
2367 assert_eq!(
2368 lex("a # comment\nb"),
2369 vec![Token::Ident("a".to_string()), Token::Newline, Token::Ident("b".to_string())]
2370 );
2371 }
2372
2373 #[test]
2374 fn comments_preserved_when_requested() {
2375 let tokens = tokenize_with_comments("a # comment")
2376 .expect("should succeed")
2377 .into_iter()
2378 .map(|s| s.token)
2379 .collect::<Vec<_>>();
2380 assert_eq!(tokens, vec![Token::Ident("a".to_string()), Token::Comment]);
2381 }
2382
2383 #[test]
2388 fn parse_simple_string() {
2389 assert_eq!(parse_string_literal(r#""hello""#).expect("ok"), "hello");
2390 }
2391
2392 #[test]
2393 fn parse_string_with_escapes() {
2394 assert_eq!(
2395 parse_string_literal(r#""hello\nworld""#).expect("ok"),
2396 "hello\nworld"
2397 );
2398 assert_eq!(
2399 parse_string_literal(r#""tab\there""#).expect("ok"),
2400 "tab\there"
2401 );
2402 assert_eq!(
2403 parse_string_literal(r#""quote\"here""#).expect("ok"),
2404 "quote\"here"
2405 );
2406 }
2407
2408 #[test]
2409 fn parse_string_with_unicode() {
2410 assert_eq!(
2411 parse_string_literal(r#""emoji \u2764""#).expect("ok"),
2412 "emoji ❤"
2413 );
2414 }
2415
2416 #[test]
2417 fn parse_string_with_escaped_dollar() {
2418 assert_eq!(
2421 parse_string_literal(r#""\$VAR""#).expect("ok"),
2422 "__KAISH_ESCAPED_DOLLAR__VAR"
2423 );
2424 assert_eq!(
2425 parse_string_literal(r#""cost: \$100""#).expect("ok"),
2426 "cost: __KAISH_ESCAPED_DOLLAR__100"
2427 );
2428 }
2429
2430 #[test]
2435 fn parse_simple_var() {
2436 assert_eq!(
2437 parse_var_ref("${X}").expect("ok"),
2438 vec!["X"]
2439 );
2440 }
2441
2442 #[test]
2443 fn parse_var_with_field() {
2444 assert_eq!(
2445 parse_var_ref("${VAR.field}").expect("ok"),
2446 vec!["VAR", "field"]
2447 );
2448 }
2449
2450 #[test]
2451 fn parse_var_with_index() {
2452 assert_eq!(
2453 parse_var_ref("${VAR[0]}").expect("ok"),
2454 vec!["VAR", "[0]"]
2455 );
2456 }
2457
2458 #[test]
2459 fn parse_var_nested() {
2460 assert_eq!(
2461 parse_var_ref("${VAR.field[0].nested}").expect("ok"),
2462 vec!["VAR", "field", "[0]", "nested"]
2463 );
2464 }
2465
2466 #[test]
2467 fn parse_last_result() {
2468 assert_eq!(
2469 parse_var_ref("${?}").expect("ok"),
2470 vec!["?"]
2471 );
2472 }
2473
2474 #[test]
2479 fn parse_integers() {
2480 assert_eq!(parse_int("0").expect("ok"), 0);
2481 assert_eq!(parse_int("42").expect("ok"), 42);
2482 assert_eq!(parse_int("-1").expect("ok"), -1);
2483 }
2484
2485 #[test]
2486 fn parse_floats() {
2487 assert!((parse_float("3.14").expect("ok") - 3.14).abs() < f64::EPSILON);
2488 assert!((parse_float("-0.5").expect("ok") - (-0.5)).abs() < f64::EPSILON);
2489 }
2490
2491 #[test]
2496 fn empty_input() {
2497 assert_eq!(lex(""), vec![]);
2498 }
2499
2500 #[test]
2501 fn only_whitespace() {
2502 assert_eq!(lex(" \t\t "), vec![]);
2503 }
2504
2505 #[test]
2506 fn json_array() {
2507 assert_eq!(
2508 lex(r#"[1, 2, 3]"#),
2509 vec![
2510 Token::LBracket,
2511 Token::Int(1),
2512 Token::Comma,
2513 Token::Int(2),
2514 Token::Comma,
2515 Token::Int(3),
2516 Token::RBracket
2517 ]
2518 );
2519 }
2520
2521 #[test]
2522 fn json_object() {
2523 assert_eq!(
2524 lex(r#"{"key": "value"}"#),
2525 vec![
2526 Token::LBrace,
2527 Token::String("key".to_string()),
2528 Token::Colon,
2529 Token::String("value".to_string()),
2530 Token::RBrace
2531 ]
2532 );
2533 }
2534
2535 #[test]
2536 fn redirect_operators() {
2537 assert_eq!(
2538 lex("cmd > file"),
2539 vec![Token::Ident("cmd".to_string()), Token::Gt, Token::Ident("file".to_string())]
2540 );
2541 assert_eq!(
2542 lex("cmd >> file"),
2543 vec![Token::Ident("cmd".to_string()), Token::GtGt, Token::Ident("file".to_string())]
2544 );
2545 assert_eq!(
2546 lex("cmd 2> err"),
2547 vec![Token::Ident("cmd".to_string()), Token::Stderr, Token::Ident("err".to_string())]
2548 );
2549 assert_eq!(
2550 lex("cmd &> all"),
2551 vec![Token::Ident("cmd".to_string()), Token::Both, Token::Ident("all".to_string())]
2552 );
2553 }
2554
2555 #[test]
2556 fn background_job() {
2557 assert_eq!(
2558 lex("cmd &"),
2559 vec![Token::Ident("cmd".to_string()), Token::Amp]
2560 );
2561 }
2562
2563 #[test]
2564 fn command_substitution() {
2565 assert_eq!(
2566 lex("$(cmd)"),
2567 vec![Token::CmdSubstStart, Token::Ident("cmd".to_string()), Token::RParen]
2568 );
2569 assert_eq!(
2570 lex("$(cmd arg)"),
2571 vec![
2572 Token::CmdSubstStart,
2573 Token::Ident("cmd".to_string()),
2574 Token::Ident("arg".to_string()),
2575 Token::RParen
2576 ]
2577 );
2578 assert_eq!(
2579 lex("$(a | b)"),
2580 vec![
2581 Token::CmdSubstStart,
2582 Token::Ident("a".to_string()),
2583 Token::Pipe,
2584 Token::Ident("b".to_string()),
2585 Token::RParen
2586 ]
2587 );
2588 }
2589
2590 #[test]
2591 fn complex_pipeline() {
2592 assert_eq!(
2593 lex(r#"cat file | grep pattern="foo" | head count=10"#),
2594 vec![
2595 Token::Ident("cat".to_string()),
2596 Token::Ident("file".to_string()),
2597 Token::Pipe,
2598 Token::Ident("grep".to_string()),
2599 Token::Ident("pattern".to_string()),
2600 Token::Eq,
2601 Token::String("foo".to_string()),
2602 Token::Pipe,
2603 Token::Ident("head".to_string()),
2604 Token::Ident("count".to_string()),
2605 Token::Eq,
2606 Token::Int(10),
2607 ]
2608 );
2609 }
2610
2611 #[test]
2616 fn short_flag() {
2617 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2618 assert_eq!(lex("-a"), vec![Token::ShortFlag("a".to_string())]);
2619 assert_eq!(lex("-v"), vec![Token::ShortFlag("v".to_string())]);
2620 }
2621
2622 #[test]
2623 fn short_flag_combined() {
2624 assert_eq!(lex("-la"), vec![Token::ShortFlag("la".to_string())]);
2626 assert_eq!(lex("-vvv"), vec![Token::ShortFlag("vvv".to_string())]);
2627 }
2628
2629 #[test]
2630 fn long_flag() {
2631 assert_eq!(lex("--force"), vec![Token::LongFlag("force".to_string())]);
2632 assert_eq!(lex("--verbose"), vec![Token::LongFlag("verbose".to_string())]);
2633 assert_eq!(lex("--foo-bar"), vec![Token::LongFlag("foo-bar".to_string())]);
2634 }
2635
2636 #[test]
2637 fn double_dash() {
2638 assert_eq!(lex("--"), vec![Token::DoubleDash]);
2640 }
2641
2642 #[test]
2643 fn flags_vs_negative_numbers() {
2644 assert_eq!(lex("-123"), vec![Token::Int(-123)]);
2646 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2648 assert_eq!(
2651 lex("-1 a"),
2652 vec![Token::Int(-1), Token::Ident("a".to_string())]
2653 );
2654 }
2655
2656 #[test]
2657 fn command_with_flags() {
2658 assert_eq!(
2659 lex("ls -l"),
2660 vec![
2661 Token::Ident("ls".to_string()),
2662 Token::ShortFlag("l".to_string()),
2663 ]
2664 );
2665 assert_eq!(
2666 lex("git commit -m"),
2667 vec![
2668 Token::Ident("git".to_string()),
2669 Token::Ident("commit".to_string()),
2670 Token::ShortFlag("m".to_string()),
2671 ]
2672 );
2673 assert_eq!(
2674 lex("git push --force"),
2675 vec![
2676 Token::Ident("git".to_string()),
2677 Token::Ident("push".to_string()),
2678 Token::LongFlag("force".to_string()),
2679 ]
2680 );
2681 }
2682
2683 #[test]
2684 fn flag_with_value() {
2685 assert_eq!(
2686 lex(r#"git commit -m "message""#),
2687 vec![
2688 Token::Ident("git".to_string()),
2689 Token::Ident("commit".to_string()),
2690 Token::ShortFlag("m".to_string()),
2691 Token::String("message".to_string()),
2692 ]
2693 );
2694 assert_eq!(
2695 lex(r#"--message="hello""#),
2696 vec![
2697 Token::LongFlag("message".to_string()),
2698 Token::Eq,
2699 Token::String("hello".to_string()),
2700 ]
2701 );
2702 }
2703
2704 #[test]
2705 fn end_of_flags_marker() {
2706 assert_eq!(
2707 lex("git checkout -- file"),
2708 vec![
2709 Token::Ident("git".to_string()),
2710 Token::Ident("checkout".to_string()),
2711 Token::DoubleDash,
2712 Token::Ident("file".to_string()),
2713 ]
2714 );
2715 }
2716
2717 #[test]
2722 fn local_keyword() {
2723 assert_eq!(lex("local"), vec![Token::Local]);
2724 assert_eq!(
2725 lex("local X = 5"),
2726 vec![Token::Local, Token::Ident("X".to_string()), Token::Eq, Token::Int(5)]
2727 );
2728 }
2729
2730 #[test]
2731 fn simple_var_ref() {
2732 assert_eq!(lex("$X"), vec![Token::SimpleVarRef("X".to_string())]);
2733 assert_eq!(lex("$foo"), vec![Token::SimpleVarRef("foo".to_string())]);
2734 assert_eq!(lex("$foo_bar"), vec![Token::SimpleVarRef("foo_bar".to_string())]);
2735 assert_eq!(lex("$_private"), vec![Token::SimpleVarRef("_private".to_string())]);
2736 }
2737
2738 #[test]
2739 fn simple_var_ref_in_command() {
2740 assert_eq!(
2741 lex("echo $NAME"),
2742 vec![Token::Ident("echo".to_string()), Token::SimpleVarRef("NAME".to_string())]
2743 );
2744 }
2745
2746 #[test]
2747 fn single_quoted_strings() {
2748 assert_eq!(lex("'hello'"), vec![Token::SingleString("hello".to_string())]);
2749 assert_eq!(lex("'hello world'"), vec![Token::SingleString("hello world".to_string())]);
2750 assert_eq!(lex("''"), vec![Token::SingleString("".to_string())]);
2751 assert_eq!(lex(r"'no $VAR here'"), vec![Token::SingleString("no $VAR here".to_string())]);
2753 assert_eq!(lex(r"'backslash \n stays'"), vec![Token::SingleString(r"backslash \n stays".to_string())]);
2754 }
2755
2756 #[test]
2757 fn test_brackets() {
2758 assert_eq!(lex("[["), vec![Token::LBracket, Token::LBracket]);
2760 assert_eq!(lex("]]"), vec![Token::RBracket, Token::RBracket]);
2761 assert_eq!(
2762 lex("[[ -f file ]]"),
2763 vec![
2764 Token::LBracket,
2765 Token::LBracket,
2766 Token::ShortFlag("f".to_string()),
2767 Token::Ident("file".to_string()),
2768 Token::RBracket,
2769 Token::RBracket
2770 ]
2771 );
2772 }
2773
2774 #[test]
2775 fn test_expression_syntax() {
2776 assert_eq!(
2777 lex(r#"[[ $X == "value" ]]"#),
2778 vec![
2779 Token::LBracket,
2780 Token::LBracket,
2781 Token::SimpleVarRef("X".to_string()),
2782 Token::EqEq,
2783 Token::String("value".to_string()),
2784 Token::RBracket,
2785 Token::RBracket
2786 ]
2787 );
2788 }
2789
2790 #[test]
2791 fn bash_style_assignment() {
2792 assert_eq!(
2794 lex(r#"NAME="value""#),
2795 vec![
2796 Token::Ident("NAME".to_string()),
2797 Token::Eq,
2798 Token::String("value".to_string())
2799 ]
2800 );
2801 }
2802
2803 #[test]
2804 fn positional_params() {
2805 assert_eq!(lex("$0"), vec![Token::Positional(0)]);
2806 assert_eq!(lex("$1"), vec![Token::Positional(1)]);
2807 assert_eq!(lex("$9"), vec![Token::Positional(9)]);
2808 assert_eq!(lex("$@"), vec![Token::AllArgs]);
2809 assert_eq!(lex("$#"), vec![Token::ArgCount]);
2810 }
2811
2812 #[test]
2813 fn positional_in_context() {
2814 assert_eq!(
2815 lex("echo $1 $2"),
2816 vec![
2817 Token::Ident("echo".to_string()),
2818 Token::Positional(1),
2819 Token::Positional(2),
2820 ]
2821 );
2822 }
2823
2824 #[test]
2825 fn var_length() {
2826 assert_eq!(lex("${#X}"), vec![Token::VarLength("X".to_string())]);
2827 assert_eq!(lex("${#NAME}"), vec![Token::VarLength("NAME".to_string())]);
2828 assert_eq!(lex("${#foo_bar}"), vec![Token::VarLength("foo_bar".to_string())]);
2829 }
2830
2831 #[test]
2832 fn var_length_in_context() {
2833 assert_eq!(
2834 lex("echo ${#NAME}"),
2835 vec![
2836 Token::Ident("echo".to_string()),
2837 Token::VarLength("NAME".to_string()),
2838 ]
2839 );
2840 }
2841
2842 #[test]
2847 fn plus_flag() {
2848 assert_eq!(lex("+e"), vec![Token::PlusFlag("e".to_string())]);
2850 assert_eq!(lex("+x"), vec![Token::PlusFlag("x".to_string())]);
2851 assert_eq!(lex("+ex"), vec![Token::PlusFlag("ex".to_string())]);
2852 }
2853
2854 #[test]
2855 fn set_with_plus_flag() {
2856 assert_eq!(
2857 lex("set +e"),
2858 vec![
2859 Token::Set,
2860 Token::PlusFlag("e".to_string()),
2861 ]
2862 );
2863 }
2864
2865 #[test]
2866 fn set_with_multiple_flags() {
2867 assert_eq!(
2868 lex("set -e -u"),
2869 vec![
2870 Token::Set,
2871 Token::ShortFlag("e".to_string()),
2872 Token::ShortFlag("u".to_string()),
2873 ]
2874 );
2875 }
2876
2877 #[test]
2878 fn flags_vs_negative_numbers_edge_cases() {
2879 assert_eq!(
2881 lex("-1 a"),
2882 vec![Token::Int(-1), Token::Ident("a".to_string())]
2883 );
2884 assert_eq!(lex("-l"), vec![Token::ShortFlag("l".to_string())]);
2886 assert_eq!(lex("-123"), vec![Token::Int(-123)]);
2888 }
2889
2890 #[test]
2891 fn single_dash_is_minus_alone() {
2892 let result = tokenize("-").expect("should lex");
2894 assert_eq!(result.len(), 1);
2895 assert!(matches!(result[0].token, Token::MinusAlone));
2896 }
2897
2898 #[test]
2899 fn plus_bare_for_date_format() {
2900 let result = tokenize("+%s").expect("should lex");
2902 assert_eq!(result.len(), 1);
2903 assert!(matches!(result[0].token, Token::PlusBare(ref s) if s == "+%s"));
2904
2905 let result = tokenize("+%Y-%m-%d").expect("should lex");
2907 assert_eq!(result.len(), 1);
2908 assert!(matches!(result[0].token, Token::PlusBare(ref s) if s == "+%Y-%m-%d"));
2909 }
2910
2911 #[test]
2912 fn plus_flag_still_works() {
2913 let result = tokenize("+e").expect("should lex");
2915 assert_eq!(result.len(), 1);
2916 assert!(matches!(result[0].token, Token::PlusFlag(ref s) if s == "e"));
2917 }
2918
2919 #[test]
2920 fn while_keyword_vs_while_loop() {
2921 assert_eq!(lex("while"), vec![Token::While]);
2923 assert_eq!(
2925 lex("while true"),
2926 vec![Token::While, Token::True]
2927 );
2928 }
2929
2930 #[test]
2931 fn control_flow_keywords() {
2932 assert_eq!(lex("break"), vec![Token::Break]);
2933 assert_eq!(lex("continue"), vec![Token::Continue]);
2934 assert_eq!(lex("return"), vec![Token::Return]);
2935 assert_eq!(lex("exit"), vec![Token::Exit]);
2936 }
2937
2938 #[test]
2939 fn control_flow_with_numbers() {
2940 assert_eq!(
2941 lex("break 2"),
2942 vec![Token::Break, Token::Int(2)]
2943 );
2944 assert_eq!(
2945 lex("continue 3"),
2946 vec![Token::Continue, Token::Int(3)]
2947 );
2948 assert_eq!(
2949 lex("exit 1"),
2950 vec![Token::Exit, Token::Int(1)]
2951 );
2952 }
2953
2954 #[test]
2959 fn heredoc_simple() {
2960 let source = "cat <<EOF\nhello\nworld\nEOF";
2961 let tokens = lex(source);
2962 assert_eq!(tokens, vec![
2964 Token::Ident("cat".to_string()),
2965 Token::HereDocStart,
2966 Token::HereDoc(HereDocData {
2967 content: "hello\nworld\n".to_string(),
2968 literal: false,
2969 strip_tabs: false,
2970 body_start_offset: 10,
2971 }),
2972 Token::Newline,
2973 ]);
2974 }
2975
2976 #[test]
2977 fn heredoc_empty() {
2978 let source = "cat <<EOF\nEOF";
2979 let tokens = lex(source);
2980 assert_eq!(tokens, vec![
2981 Token::Ident("cat".to_string()),
2982 Token::HereDocStart,
2983 Token::HereDoc(HereDocData {
2984 content: "".to_string(),
2985 literal: false,
2986 strip_tabs: false,
2987 body_start_offset: 10,
2988 }),
2989 Token::Newline,
2990 ]);
2991 }
2992
2993 #[test]
2994 fn heredoc_with_special_chars() {
2995 let source = "cat <<EOF\n$VAR and \"quoted\" 'single'\nEOF";
2996 let tokens = lex(source);
2997 assert_eq!(tokens, vec![
2998 Token::Ident("cat".to_string()),
2999 Token::HereDocStart,
3000 Token::HereDoc(HereDocData {
3001 content: "$VAR and \"quoted\" 'single'\n".to_string(),
3002 literal: false,
3003 strip_tabs: false,
3004 body_start_offset: 10,
3005 }),
3006 Token::Newline,
3007 ]);
3008 }
3009
3010 #[test]
3011 fn heredoc_multiline() {
3012 let source = "cat <<END\nline1\nline2\nline3\nEND";
3013 let tokens = lex(source);
3014 assert_eq!(tokens, vec![
3015 Token::Ident("cat".to_string()),
3016 Token::HereDocStart,
3017 Token::HereDoc(HereDocData {
3018 content: "line1\nline2\nline3\n".to_string(),
3019 literal: false,
3020 strip_tabs: false,
3021 body_start_offset: 10,
3022 }),
3023 Token::Newline,
3024 ]);
3025 }
3026
3027 #[test]
3028 fn heredoc_in_command() {
3029 let source = "cat <<EOF\nhello\nEOF\necho goodbye";
3030 let tokens = lex(source);
3031 assert_eq!(tokens, vec![
3032 Token::Ident("cat".to_string()),
3033 Token::HereDocStart,
3034 Token::HereDoc(HereDocData {
3035 content: "hello\n".to_string(),
3036 literal: false,
3037 strip_tabs: false,
3038 body_start_offset: 10,
3039 }),
3040 Token::Newline,
3041 Token::Ident("echo".to_string()),
3042 Token::Ident("goodbye".to_string()),
3043 ]);
3044 }
3045
3046 #[test]
3047 fn heredoc_strip_tabs() {
3048 let source = "cat <<-EOF\n\thello\n\tworld\n\tEOF";
3049 let tokens = lex(source);
3050 assert_eq!(tokens, vec![
3054 Token::Ident("cat".to_string()),
3055 Token::HereDocStart,
3056 Token::HereDoc(HereDocData {
3057 content: "\thello\n\tworld\n".to_string(),
3058 literal: false,
3059 strip_tabs: true,
3060 body_start_offset: 11,
3061 }),
3062 Token::Newline,
3063 ]);
3064 }
3065
3066 #[test]
3071 fn arithmetic_simple() {
3072 let source = "$((1 + 2))";
3073 let tokens = lex(source);
3074 assert_eq!(tokens, vec![Token::Arithmetic("1 + 2".to_string())]);
3075 }
3076
3077 #[test]
3078 fn arithmetic_in_assignment() {
3079 let source = "X=$((5 * 3))";
3080 let tokens = lex(source);
3081 assert_eq!(tokens, vec![
3082 Token::Ident("X".to_string()),
3083 Token::Eq,
3084 Token::Arithmetic("5 * 3".to_string()),
3085 ]);
3086 }
3087
3088 #[test]
3089 fn arithmetic_with_nested_parens() {
3090 let source = "$((2 * (3 + 4)))";
3091 let tokens = lex(source);
3092 assert_eq!(tokens, vec![Token::Arithmetic("2 * (3 + 4)".to_string())]);
3093 }
3094
3095 #[test]
3096 fn arithmetic_with_variable() {
3097 let source = "$((X + 1))";
3098 let tokens = lex(source);
3099 assert_eq!(tokens, vec![Token::Arithmetic("X + 1".to_string())]);
3100 }
3101
3102 #[test]
3103 fn arithmetic_command_subst_not_confused() {
3104 let source = "$(echo hello)";
3106 let tokens = lex(source);
3107 assert_eq!(tokens, vec![
3108 Token::CmdSubstStart,
3109 Token::Ident("echo".to_string()),
3110 Token::Ident("hello".to_string()),
3111 Token::RParen,
3112 ]);
3113 }
3114
3115 #[test]
3116 fn arithmetic_nesting_limit() {
3117 let open_parens = "(".repeat(300);
3119 let close_parens = ")".repeat(300);
3120 let source = format!("$(({}1{}))", open_parens, close_parens);
3121 let result = tokenize(&source);
3122 assert!(result.is_err());
3123 let errors = result.unwrap_err();
3124 assert_eq!(errors.len(), 1);
3125 assert_eq!(errors[0].token, LexerError::NestingTooDeep);
3126 }
3127
3128 #[test]
3129 fn arithmetic_nesting_within_limit() {
3130 let source = "$((((1 + 2) * 3)))";
3132 let tokens = lex(source);
3133 assert_eq!(tokens, vec![Token::Arithmetic("((1 + 2) * 3)".to_string())]);
3134 }
3135
3136 #[test]
3148 fn arithmetic_after_apostrophe_in_comment() {
3149 let source = "# this doesn't work\necho $((1+2))";
3152 let tokens = lex(source);
3153 assert_eq!(tokens, vec![
3154 Token::Newline,
3155 Token::Ident("echo".to_string()),
3156 Token::Arithmetic("1+2".to_string()),
3157 ]);
3158 }
3159
3160 #[test]
3161 fn arithmetic_inside_comment_is_not_expanded() {
3162 let source = "# the $((y)) syntax explained\necho hello";
3164 let tokens = lex(source);
3165 assert_eq!(tokens, vec![
3166 Token::Newline,
3167 Token::Ident("echo".to_string()),
3168 Token::Ident("hello".to_string()),
3169 ]);
3170 }
3171
3172 #[test]
3173 fn backticked_arithmetic_in_comment_is_not_expanded() {
3174 let source = "# the `$((x))` syntax explained\necho $((3+4))";
3178 let tokens = lex(source);
3179 assert_eq!(tokens, vec![
3180 Token::Newline,
3181 Token::Ident("echo".to_string()),
3182 Token::Arithmetic("3+4".to_string()),
3183 ]);
3184 }
3185
3186 #[test]
3187 fn arithmetic_still_works_outside_comments() {
3188 let source = "X=$((1+2)); Y=$((3*4))";
3191 let tokens = lex(source);
3192 assert_eq!(tokens, vec![
3193 Token::Ident("X".to_string()),
3194 Token::Eq,
3195 Token::Arithmetic("1+2".to_string()),
3196 Token::Semi,
3197 Token::Ident("Y".to_string()),
3198 Token::Eq,
3199 Token::Arithmetic("3*4".to_string()),
3200 ]);
3201 }
3202
3203 #[test]
3204 fn arithmetic_inside_double_quotes_still_expands() {
3205 let source = "echo \"# $((1+2))\"";
3208 let tokens = lex(source);
3209 assert_eq!(tokens.len(), 2);
3214 assert!(matches!(tokens[0], Token::Ident(_)));
3215 assert!(matches!(tokens[1], Token::String(_)));
3216 }
3217
3218 #[test]
3231 fn backtick_in_source_is_rejected() {
3232 let result = tokenize("echo `date`");
3233 assert!(result.is_err());
3234 let errors = result.unwrap_err();
3235 assert!(errors.iter().any(|e| e.token == LexerError::BackticksNotSupported));
3236 }
3237
3238 #[test]
3239 fn backtick_in_comment_is_just_comment_text() {
3240 let source = "# use `date` here\necho hi";
3243 let tokens = lex(source);
3244 assert_eq!(tokens, vec![
3245 Token::Newline,
3246 Token::Ident("echo".to_string()),
3247 Token::Ident("hi".to_string()),
3248 ]);
3249 }
3250
3251 #[test]
3252 fn backtick_in_single_quoted_string_is_literal() {
3253 let source = "echo '`date`'";
3256 let tokens = lex(source);
3257 assert_eq!(tokens, vec![
3258 Token::Ident("echo".to_string()),
3259 Token::SingleString("`date`".to_string()),
3260 ]);
3261 }
3262
3263 #[test]
3264 fn backtick_in_double_quoted_string_is_literal() {
3265 let source = "echo \"`date`\"";
3270 let tokens = lex(source);
3271 assert_eq!(tokens.len(), 2);
3272 assert!(matches!(tokens[0], Token::Ident(_)));
3273 match &tokens[1] {
3274 Token::String(s) => assert!(s.contains('`')),
3275 other => panic!("expected Token::String, got {:?}", other),
3276 }
3277 }
3278
3279 #[test]
3280 fn backtick_in_heredoc_body_is_preserved() {
3281 let source = "cat <<EOF\n`date`\nEOF\n";
3284 let tokens = lex(source);
3285 let heredoc = tokens.iter().find(|t| matches!(t, Token::HereDoc(_)));
3286 assert!(heredoc.is_some(), "expected a HereDoc token");
3287 if let Some(Token::HereDoc(d)) = heredoc {
3288 assert!(d.content.contains('`'));
3289 }
3290 }
3291
3292 #[test]
3297 fn token_categories() {
3298 assert_eq!(Token::If.category(), TokenCategory::Keyword);
3300 assert_eq!(Token::Then.category(), TokenCategory::Keyword);
3301 assert_eq!(Token::For.category(), TokenCategory::Keyword);
3302 assert_eq!(Token::Function.category(), TokenCategory::Keyword);
3303 assert_eq!(Token::True.category(), TokenCategory::Keyword);
3304 assert_eq!(Token::TypeString.category(), TokenCategory::Keyword);
3305
3306 assert_eq!(Token::Pipe.category(), TokenCategory::Operator);
3308 assert_eq!(Token::And.category(), TokenCategory::Operator);
3309 assert_eq!(Token::Or.category(), TokenCategory::Operator);
3310 assert_eq!(Token::StderrToStdout.category(), TokenCategory::Operator);
3311 assert_eq!(Token::GtGt.category(), TokenCategory::Operator);
3312
3313 assert_eq!(Token::String("test".to_string()).category(), TokenCategory::String);
3315 assert_eq!(Token::SingleString("test".to_string()).category(), TokenCategory::String);
3316 assert_eq!(
3317 Token::HereDoc(HereDocData {
3318 content: "test".to_string(),
3319 literal: false,
3320 strip_tabs: false,
3321 body_start_offset: 0,
3322 }).category(),
3323 TokenCategory::String,
3324 );
3325
3326 assert_eq!(Token::Int(42).category(), TokenCategory::Number);
3328 assert_eq!(Token::Float(3.14).category(), TokenCategory::Number);
3329 assert_eq!(Token::Arithmetic("1+2".to_string()).category(), TokenCategory::Number);
3330
3331 assert_eq!(Token::SimpleVarRef("X".to_string()).category(), TokenCategory::Variable);
3333 assert_eq!(Token::VarRef("${X}".to_string()).category(), TokenCategory::Variable);
3334 assert_eq!(Token::Positional(1).category(), TokenCategory::Variable);
3335 assert_eq!(Token::AllArgs.category(), TokenCategory::Variable);
3336 assert_eq!(Token::ArgCount.category(), TokenCategory::Variable);
3337 assert_eq!(Token::LastExitCode.category(), TokenCategory::Variable);
3338 assert_eq!(Token::CurrentPid.category(), TokenCategory::Variable);
3339
3340 assert_eq!(Token::ShortFlag("l".to_string()).category(), TokenCategory::Flag);
3342 assert_eq!(Token::LongFlag("verbose".to_string()).category(), TokenCategory::Flag);
3343 assert_eq!(Token::PlusFlag("e".to_string()).category(), TokenCategory::Flag);
3344 assert_eq!(Token::DoubleDash.category(), TokenCategory::Flag);
3345
3346 assert_eq!(Token::Semi.category(), TokenCategory::Punctuation);
3348 assert_eq!(Token::LParen.category(), TokenCategory::Punctuation);
3349 assert_eq!(Token::LBracket.category(), TokenCategory::Punctuation);
3350 assert_eq!(Token::Newline.category(), TokenCategory::Punctuation);
3351
3352 assert_eq!(Token::Comment.category(), TokenCategory::Comment);
3354
3355 assert_eq!(Token::Path("/tmp/file".to_string()).category(), TokenCategory::Path);
3357
3358 assert_eq!(Token::Ident("echo".to_string()).category(), TokenCategory::Command);
3360 assert_eq!(Token::NumberIdent("019dda1c".to_string()).category(), TokenCategory::Command);
3361 assert_eq!(Token::DottedIdent(".gitignore".to_string()).category(), TokenCategory::Command);
3362
3363 assert_eq!(Token::InvalidFloatNoLeading.category(), TokenCategory::Error);
3365 assert_eq!(Token::InvalidFloatNoTrailing.category(), TokenCategory::Error);
3366 }
3367
3368 #[test]
3369 fn test_heredoc_piped_to_command() {
3370 let tokens = tokenize("cat <<EOF | jq\n{\"key\": \"val\"}\nEOF").unwrap();
3373 let heredoc_pos = tokens.iter().position(|t| matches!(t.token, Token::HereDoc(_)));
3374 let pipe_pos = tokens.iter().position(|t| matches!(t.token, Token::Pipe));
3375 assert!(heredoc_pos.is_some(), "should have a heredoc token");
3376 assert!(pipe_pos.is_some(), "should have a pipe token");
3377 assert!(
3378 pipe_pos.unwrap() > heredoc_pos.unwrap(),
3379 "Pipe must come after heredoc, got heredoc at {}, pipe at {}. Tokens: {:?}",
3380 heredoc_pos.unwrap(), pipe_pos.unwrap(), tokens,
3381 );
3382 }
3383
3384 #[test]
3385 fn test_heredoc_standalone_still_works() {
3386 let tokens = tokenize("cat <<EOF\nhello\nEOF").unwrap();
3388 assert!(tokens.iter().any(|t| matches!(t.token, Token::HereDoc(_))));
3389 assert!(!tokens.iter().any(|t| matches!(t.token, Token::Pipe)));
3390 }
3391
3392 #[test]
3393 fn test_heredoc_preserves_leading_empty_lines() {
3394 let tokens = tokenize("cat <<EOF\n\nhello\nEOF").unwrap();
3396 let heredoc = tokens.iter().find_map(|t| {
3397 if let Token::HereDoc(data) = &t.token {
3398 Some(data.clone())
3399 } else {
3400 None
3401 }
3402 });
3403 assert!(heredoc.is_some(), "should have a heredoc token");
3404 let data = heredoc.unwrap();
3405 assert!(data.content.starts_with('\n'), "leading empty line must be preserved, got: {:?}", data.content);
3406 assert_eq!(data.content, "\nhello\n");
3407 }
3408
3409 #[test]
3410 fn test_heredoc_quoted_delimiter_sets_literal() {
3411 let tokens = tokenize("cat <<'EOF'\nhello $HOME\nEOF").unwrap();
3413 let heredoc = tokens.iter().find_map(|t| {
3414 if let Token::HereDoc(data) = &t.token {
3415 Some(data.clone())
3416 } else {
3417 None
3418 }
3419 });
3420 assert!(heredoc.is_some(), "should have a heredoc token");
3421 let data = heredoc.unwrap();
3422 assert!(data.literal, "quoted delimiter should set literal=true");
3423 assert_eq!(data.content, "hello $HOME\n");
3424 }
3425
3426 #[test]
3427 fn test_heredoc_unquoted_delimiter_not_literal() {
3428 let tokens = tokenize("cat <<EOF\nhello $HOME\nEOF").unwrap();
3430 let heredoc = tokens.iter().find_map(|t| {
3431 if let Token::HereDoc(data) = &t.token {
3432 Some(data.clone())
3433 } else {
3434 None
3435 }
3436 });
3437 assert!(heredoc.is_some(), "should have a heredoc token");
3438 let data = heredoc.unwrap();
3439 assert!(!data.literal, "unquoted delimiter should have literal=false");
3440 }
3441
3442 #[test]
3447 fn colon_double_in_word() {
3448 assert_eq!(lex("foo::bar"), vec![Token::Ident("foo::bar".into())]);
3449 }
3450
3451 #[test]
3452 fn colon_single_in_word() {
3453 assert_eq!(lex("a:b:c"), vec![Token::Ident("a:b:c".into())]);
3454 }
3455
3456 #[test]
3457 fn colon_with_port() {
3458 assert_eq!(lex("host:8080"), vec![Token::Ident("host:8080".into())]);
3459 }
3460
3461 #[test]
3462 fn colon_standalone() {
3463 assert_eq!(lex(":"), vec![Token::Colon]);
3464 }
3465
3466 #[test]
3467 fn colon_spaced_no_merge() {
3468 assert_eq!(
3469 lex("foo : bar"),
3470 vec![
3471 Token::Ident("foo".into()),
3472 Token::Colon,
3473 Token::Ident("bar".into()),
3474 ]
3475 );
3476 }
3477
3478 #[test]
3479 fn colon_in_command_arg() {
3480 assert_eq!(
3481 lex("echo foo::bar"),
3482 vec![
3483 Token::Ident("echo".into()),
3484 Token::Ident("foo::bar".into()),
3485 ]
3486 );
3487 }
3488
3489 #[test]
3490 fn colon_trailing() {
3491 assert_eq!(lex("foo:"), vec![Token::Ident("foo:".into())]);
3493 }
3494
3495 #[test]
3496 fn colon_leading() {
3497 assert_eq!(lex(":foo"), vec![Token::Ident(":foo".into())]);
3499 }
3500
3501 #[test]
3502 fn colon_with_path() {
3503 assert_eq!(
3505 lex("/usr/bin:8080"),
3506 vec![Token::Ident("/usr/bin:8080".into())]
3507 );
3508 }
3509
3510 #[test]
3515 fn is_keyword_covers_control_flow() {
3516 for t in [
3517 Token::While,
3518 Token::Return,
3519 Token::Break,
3520 Token::Continue,
3521 Token::Exit,
3522 ] {
3523 assert!(t.is_keyword(), "{t:?} should be a keyword");
3524 }
3525 }
3526
3527 #[test]
3528 fn starts_statement_covers_while() {
3529 assert!(Token::While.starts_statement());
3530 }
3531
3532 #[test]
3533 fn is_keyword_rejects_operators() {
3534 for t in [Token::Pipe, Token::Amp, Token::Eq, Token::LBrace] {
3535 assert!(!t.is_keyword(), "{t:?} should not be a keyword");
3536 }
3537 }
3538}