1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3 Integer(i64),
5 Float(f64),
6 SingleString(String),
7 DoubleString(String),
8 BacktickString(String),
10 Regex(String, String, char),
12 HereDoc(String, String, bool),
13 QW(Vec<String>),
14
15 ScalarVar(String),
17 DerefScalarVar(String),
19 ArrayVar(String),
20 HashVar(String),
21 ArrayAt,
22 HashPercent,
23
24 Ident(String),
26 Label(String),
27 PackageSep,
28 FormatDecl {
30 name: String,
31 lines: Vec<String>,
32 },
33
34 Plus,
36 Minus,
37 Star,
38 Slash,
39 Percent,
40 Power,
41
42 Dot,
44 X,
45
46 NumEq,
48 NumNe,
49 NumLt,
50 NumGt,
51 NumLe,
52 NumGe,
53 Spaceship,
54
55 StrEq,
57 StrNe,
58 StrLt,
59 StrGt,
60 StrLe,
61 StrGe,
62 StrCmp,
63
64 LogAnd,
66 LogOr,
67 LogNot,
68 LogAndWord,
69 LogOrWord,
70 LogNotWord,
71 DefinedOr,
72
73 BitAnd,
75 BitOr,
76 BitXor,
77 BitNot,
78 ShiftLeft,
79 ShiftRight,
80
81 Assign,
83 PlusAssign,
84 MinusAssign,
85 MulAssign,
86 DivAssign,
87 ModAssign,
88 PowAssign,
89 DotAssign,
90 AndAssign,
91 OrAssign,
92 XorAssign,
93 ShiftLeftAssign,
94 ShiftRightAssign,
95 BitAndAssign,
97 BitOrAssign,
99 DefinedOrAssign,
100
101 Increment,
103 Decrement,
104
105 BindMatch,
107 BindNotMatch,
108
109 Arrow,
111 FatArrow,
112 PipeForward,
114 ThreadArrow,
116 ThreadArrowLast,
118 ThreadArrowStream,
124 ThreadArrowStreamLast,
127 ThreadArrowPar,
132 ThreadArrowParLast,
134 Range,
136 RangeExclusive,
138 Backslash,
139
140 LParen,
142 RParen,
143 LBracket,
144 RBracket,
145 LBrace,
146 RBrace,
147 ArrowBrace,
149
150 Semicolon,
152 Comma,
153 Question,
154 Colon,
155
156 Diamond,
158 ReadLine(String),
159
160 FileTest(char),
162
163 Eof,
165 Newline,
166}
167
168impl Token {
169 pub fn is_term_start(&self) -> bool {
170 matches!(
171 self,
172 Token::Integer(_)
173 | Token::Float(_)
174 | Token::SingleString(_)
175 | Token::DoubleString(_)
176 | Token::BacktickString(_)
177 | Token::ScalarVar(_)
178 | Token::DerefScalarVar(_)
179 | Token::ArrayVar(_)
180 | Token::HashVar(_)
181 | Token::Ident(_)
182 | Token::LParen
183 | Token::LBracket
184 | Token::LBrace
185 | Token::Backslash
186 | Token::Minus
187 | Token::LogNot
188 | Token::BitNot
189 | Token::LogNotWord
190 | Token::QW(_)
191 | Token::Regex(_, _, _)
192 | Token::FileTest(_)
193 | Token::ThreadArrow
194 | Token::ThreadArrowLast
195 | Token::ThreadArrowStream
196 | Token::ThreadArrowStreamLast
197 | Token::ThreadArrowPar
198 | Token::ThreadArrowParLast
199 )
200 }
201}
202
203pub fn keyword_or_ident(word: &str) -> Token {
205 match word {
206 "x" => Token::X,
207 "eq" => Token::StrEq,
208 "ne" => Token::StrNe,
209 "lt" => Token::StrLt,
210 "gt" => Token::StrGt,
211 "le" => Token::StrLe,
212 "ge" => Token::StrGe,
213 "cmp" => Token::StrCmp,
214 "and" => Token::LogAndWord,
215 "or" => Token::LogOrWord,
216 "not" => Token::LogNotWord,
217 _ => Token::Ident(word.to_string()),
218 }
219}
220
221pub const KEYWORDS: &[&str] = &[
224 "frozen",
225 "typed",
226 "my",
227 "mysync",
228 "our",
229 "oursync",
230 "local",
231 "sub",
232 "fn",
233 "struct",
234 "enum",
235 "class",
236 "trait",
237 "extends",
238 "impl",
239 "pub",
240 "priv",
241 "Self",
242 "return",
243 "if",
244 "elsif",
245 "else",
246 "unless",
247 "while",
248 "until",
249 "for",
250 "foreach",
251 "do",
252 "last",
253 "next",
254 "redo",
255 "use",
256 "no",
257 "require",
258 "package",
259 "bless",
260 "print",
261 "say",
262 "die",
263 "warn",
264 "chomp",
265 "chop",
266 "push",
267 "pop",
268 "shift",
269 "shuffle",
270 "chunked",
271 "windowed",
272 "unshift",
273 "splice",
274 "split",
275 "join",
276 "json_decode",
277 "json_encode",
278 "json_jq",
279 "jwt_decode",
280 "jwt_decode_unsafe",
281 "jwt_encode",
282 "log_debug",
283 "log_error",
284 "log_info",
285 "log_json",
286 "log_level",
287 "log_trace",
288 "log_warn",
289 "sha256",
290 "sha1",
291 "md5",
292 "hmac_sha256",
293 "hmac",
294 "uuid",
295 "base64_encode",
296 "base64_decode",
297 "hex_encode",
298 "hex_decode",
299 "gzip",
300 "gunzip",
301 "zstd",
302 "zstd_decode",
303 "datetime_utc",
304 "datetime_from_epoch",
305 "datetime_parse_rfc3339",
306 "datetime_strftime",
307 "toml_decode",
308 "toml_encode",
309 "yaml_decode",
310 "yaml_encode",
311 "url_encode",
312 "url_decode",
313 "uri_escape",
314 "uri_unescape",
315 "sort",
316 "reverse",
317 "reversed",
318 "map",
319 "maps",
320 "flat_map",
321 "flat_maps",
322 "flatten",
323 "compact",
324 "reject",
325 "grepv",
326 "concat",
327 "chain",
328 "set",
329 "list_count",
330 "list_size",
331 "count",
332 "size",
333 "cnt",
334 "inject",
335 "first",
336 "detect",
337 "find",
338 "find_all",
339 "match",
340 "grep",
341 "greps",
342 "keys",
343 "values",
344 "each",
345 "delete",
346 "exists",
347 "open",
348 "close",
349 "read",
350 "write",
351 "seek",
352 "tell",
353 "eof",
354 "defined",
355 "undef",
356 "ref",
357 "eval",
358 "exec",
359 "system",
360 "chdir",
361 "mkdir",
362 "rmdir",
363 "unlink",
364 "rename",
365 "chmod",
366 "chown",
367 "length",
368 "substr",
369 "index",
370 "rindex",
371 "sprintf",
372 "printf",
373 "lc",
374 "uc",
375 "lcfirst",
376 "ucfirst",
377 "hex",
378 "oct",
379 "int",
380 "abs",
381 "sqrt",
382 "scalar",
383 "wantarray",
384 "caller",
385 "exit",
386 "pos",
387 "quotemeta",
388 "chr",
389 "ord",
390 "pack",
391 "unpack",
392 "vec",
393 "tie",
394 "untie",
395 "tied",
396 "chomp",
397 "chop",
398 "defined",
399 "dump",
400 "each",
401 "exists",
402 "formline",
403 "lock",
404 "prototype",
405 "reset",
406 "scalar",
407 "BEGIN",
408 "END",
409 "INIT",
410 "CHECK",
411 "UNITCHECK",
412 "AUTOLOAD",
413 "DESTROY",
414 "all",
415 "any",
416 "none",
417 "take_while",
418 "drop_while",
419 "skip_while",
420 "skip",
421 "first_or",
422 "tap",
423 "peek",
424 "with_index",
425 "pmap",
426 "pflat_map",
427 "puniq",
428 "pfirst",
429 "pany",
430 "pmap_chunked",
431 "pipeline",
432 "pgrep",
433 "pfor",
434 "par_lines",
435 "par_walk",
436 "pwatch",
437 "psort",
438 "reduce",
439 "fold",
440 "preduce",
441 "preduce_init",
442 "pmap_reduce",
443 "pcache",
444 "watch",
445 "tie",
446 "fan",
447 "fan_cap",
448 "pchannel",
449 "pselect",
450 "uniq",
451 "distinct",
452 "uniqstr",
453 "uniqint",
454 "uniqnum",
455 "pairs",
456 "unpairs",
457 "pairkeys",
458 "pairvalues",
459 "pairgrep",
460 "pairmap",
461 "pairfirst",
462 "sample",
463 "zip",
464 "zip_shortest",
465 "mesh",
466 "mesh_shortest",
467 "notall",
468 "reductions",
469 "sum",
470 "sum0",
471 "product",
472 "min",
473 "max",
474 "minstr",
475 "maxstr",
476 "mean",
477 "median",
478 "mode",
479 "stddev",
480 "variance",
481 "async",
482 "spawn",
483 "trace",
484 "timer",
485 "bench",
486 "await",
487 "slurp",
488 "capture",
489 "fetch_url",
490 "fetch",
491 "fetch_json",
492 "fetch_async",
493 "fetch_async_json",
494 "json_jq",
495 "par_fetch",
496 "par_pipeline",
497 "par_csv_read",
498 "par_sed",
499 "try",
500 "catch",
501 "finally",
502 "given",
503 "when",
504 "default",
505 "eval_timeout",
506 "thread",
507 "t",
508];
509
510#[cfg(test)]
511mod tests {
512 use super::*;
513
514 #[test]
515 fn keyword_or_ident_maps_string_ops() {
516 assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
517 assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
518 }
519
520 #[test]
521 fn keyword_or_ident_non_keyword_is_ident() {
522 assert!(matches!(
523 keyword_or_ident("foo_bar"),
524 Token::Ident(s) if s == "foo_bar"
525 ));
526 }
527
528 #[test]
529 fn keyword_or_ident_logical_words_and_repeat() {
530 assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
531 assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
532 assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
533 assert!(matches!(keyword_or_ident("x"), Token::X));
534 }
535
536 #[test]
537 fn keyword_or_ident_string_comparison_words() {
538 assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
539 assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
540 assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
541 }
542
543 #[test]
544 fn keyword_or_ident_string_le_ne() {
545 assert!(matches!(keyword_or_ident("le"), Token::StrLe));
546 assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
547 }
548
549 #[test]
550 fn keyword_or_ident_control_flow_keywords() {
551 assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
552 assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
553 assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
554 assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
555 assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
556 assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
557 assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
558 assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
559 assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
560 }
561
562 #[test]
563 fn keyword_or_ident_declarations() {
564 assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
565 assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
566 assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
567 assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
568 assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
569 assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
570 }
571
572 #[test]
573 fn keyword_or_ident_io_and_list_ops() {
574 assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
575 assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
576 assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
577 assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
578 assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
579 assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
580 assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
581 assert!(matches!(
582 keyword_or_ident("list_count"),
583 Token::Ident(s) if s == "list_count"
584 ));
585 assert!(matches!(
586 keyword_or_ident("list_size"),
587 Token::Ident(s) if s == "list_size"
588 ));
589 assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
590 assert!(matches!(
591 keyword_or_ident("capture"),
592 Token::Ident(s) if s == "capture"
593 ));
594 }
595
596 #[test]
597 fn keyword_or_ident_parallel_primitives() {
598 assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
599 assert!(matches!(
600 keyword_or_ident("pmap_chunked"),
601 Token::Ident(s) if s == "pmap_chunked"
602 ));
603 assert!(matches!(
604 keyword_or_ident("pipeline"),
605 Token::Ident(s) if s == "pipeline"
606 ));
607 assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
608 assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
609 assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
610 assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
611 assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
612 assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
613 assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
614 assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
615 assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
616 }
617
618 #[test]
619 fn keyword_or_ident_type_and_ref() {
620 assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
621 assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
622 assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
623 assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
624 }
625
626 #[test]
627 fn keyword_or_ident_block_hooks() {
628 assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
629 assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
630 assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
631 }
632
633 #[test]
634 fn keyword_or_ident_plain_identifier_untouched() {
635 assert!(matches!(
636 keyword_or_ident("xyzzy123"),
637 Token::Ident(s) if s == "xyzzy123"
638 ));
639 }
640}