1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3 Integer(i64),
5 Float(f64),
6 SingleString(String),
7 DoubleString(String),
8 BacktickString(String),
10 Regex(String, String, char),
12 HereDoc(String, String, bool),
13 QW(Vec<String>),
14
15 ScalarVar(String),
17 DerefScalarVar(String),
19 ArrayVar(String),
20 HashVar(String),
21 ArrayAt,
22 HashPercent,
23
24 Ident(String),
26 Label(String),
27 PackageSep,
28 FormatDecl {
30 name: String,
31 lines: Vec<String>,
32 },
33
34 Plus,
36 Minus,
37 Star,
38 Slash,
39 Percent,
40 Power,
41
42 Dot,
44 X,
45
46 NumEq,
48 NumNe,
49 NumLt,
50 NumGt,
51 NumLe,
52 NumGe,
53 Spaceship,
54
55 StrEq,
57 StrNe,
58 StrLt,
59 StrGt,
60 StrLe,
61 StrGe,
62 StrCmp,
63
64 LogAnd,
66 LogOr,
67 LogNot,
68 LogAndWord,
69 LogOrWord,
70 LogNotWord,
71 DefinedOr,
72
73 BitAnd,
75 BitOr,
76 BitXor,
77 BitNot,
78 ShiftLeft,
79 ShiftRight,
80
81 Assign,
83 PlusAssign,
84 MinusAssign,
85 MulAssign,
86 DivAssign,
87 ModAssign,
88 PowAssign,
89 DotAssign,
90 AndAssign,
91 OrAssign,
92 XorAssign,
93 ShiftLeftAssign,
94 ShiftRightAssign,
95 BitAndAssign,
97 BitOrAssign,
99 DefinedOrAssign,
100
101 Increment,
103 Decrement,
104
105 BindMatch,
107 BindNotMatch,
108
109 Arrow,
111 FatArrow,
112 PipeForward,
114 ThreadArrow,
116 Range,
118 RangeExclusive,
120 Backslash,
121
122 LParen,
124 RParen,
125 LBracket,
126 RBracket,
127 LBrace,
128 RBrace,
129 ArrowBrace,
131
132 Semicolon,
134 Comma,
135 Question,
136 Colon,
137
138 Diamond,
140 ReadLine(String),
141
142 FileTest(char),
144
145 Eof,
147 Newline,
148}
149
150impl Token {
151 pub fn is_term_start(&self) -> bool {
152 matches!(
153 self,
154 Token::Integer(_)
155 | Token::Float(_)
156 | Token::SingleString(_)
157 | Token::DoubleString(_)
158 | Token::BacktickString(_)
159 | Token::ScalarVar(_)
160 | Token::DerefScalarVar(_)
161 | Token::ArrayVar(_)
162 | Token::HashVar(_)
163 | Token::Ident(_)
164 | Token::LParen
165 | Token::LBracket
166 | Token::LBrace
167 | Token::Backslash
168 | Token::Minus
169 | Token::LogNot
170 | Token::BitNot
171 | Token::LogNotWord
172 | Token::QW(_)
173 | Token::Regex(_, _, _)
174 | Token::FileTest(_)
175 )
176 }
177}
178
179pub fn keyword_or_ident(word: &str) -> Token {
181 match word {
182 "x" => Token::X,
183 "eq" => Token::StrEq,
184 "ne" => Token::StrNe,
185 "lt" => Token::StrLt,
186 "gt" => Token::StrGt,
187 "le" => Token::StrLe,
188 "ge" => Token::StrGe,
189 "cmp" => Token::StrCmp,
190 "and" => Token::LogAndWord,
191 "or" => Token::LogOrWord,
192 "not" => Token::LogNotWord,
193 _ => Token::Ident(word.to_string()),
194 }
195}
196
197pub const KEYWORDS: &[&str] = &[
200 "frozen",
201 "typed",
202 "my",
203 "mysync",
204 "our",
205 "local",
206 "sub",
207 "fn",
208 "struct",
209 "enum",
210 "class",
211 "trait",
212 "extends",
213 "impl",
214 "pub",
215 "priv",
216 "Self",
217 "return",
218 "if",
219 "elsif",
220 "else",
221 "unless",
222 "while",
223 "until",
224 "for",
225 "foreach",
226 "do",
227 "last",
228 "next",
229 "redo",
230 "use",
231 "no",
232 "require",
233 "package",
234 "bless",
235 "print",
236 "say",
237 "die",
238 "warn",
239 "chomp",
240 "chop",
241 "push",
242 "pop",
243 "shift",
244 "shuffle",
245 "chunked",
246 "windowed",
247 "unshift",
248 "splice",
249 "split",
250 "join",
251 "json_decode",
252 "json_encode",
253 "json_jq",
254 "jwt_decode",
255 "jwt_decode_unsafe",
256 "jwt_encode",
257 "log_debug",
258 "log_error",
259 "log_info",
260 "log_json",
261 "log_level",
262 "log_trace",
263 "log_warn",
264 "sha256",
265 "sha1",
266 "md5",
267 "hmac_sha256",
268 "hmac",
269 "uuid",
270 "base64_encode",
271 "base64_decode",
272 "hex_encode",
273 "hex_decode",
274 "gzip",
275 "gunzip",
276 "zstd",
277 "zstd_decode",
278 "datetime_utc",
279 "datetime_from_epoch",
280 "datetime_parse_rfc3339",
281 "datetime_strftime",
282 "toml_decode",
283 "toml_encode",
284 "yaml_decode",
285 "yaml_encode",
286 "url_encode",
287 "url_decode",
288 "uri_escape",
289 "uri_unescape",
290 "sort",
291 "reverse",
292 "reversed",
293 "map",
294 "maps",
295 "flat_map",
296 "flat_maps",
297 "flatten",
298 "compact",
299 "reject",
300 "concat",
301 "chain",
302 "set",
303 "list_count",
304 "list_size",
305 "count",
306 "size",
307 "cnt",
308 "inject",
309 "first",
310 "detect",
311 "find",
312 "find_all",
313 "match",
314 "grep",
315 "greps",
316 "keys",
317 "values",
318 "each",
319 "delete",
320 "exists",
321 "open",
322 "close",
323 "read",
324 "write",
325 "seek",
326 "tell",
327 "eof",
328 "defined",
329 "undef",
330 "ref",
331 "eval",
332 "exec",
333 "system",
334 "chdir",
335 "mkdir",
336 "rmdir",
337 "unlink",
338 "rename",
339 "chmod",
340 "chown",
341 "length",
342 "substr",
343 "index",
344 "rindex",
345 "sprintf",
346 "printf",
347 "lc",
348 "uc",
349 "lcfirst",
350 "ucfirst",
351 "hex",
352 "oct",
353 "int",
354 "abs",
355 "sqrt",
356 "scalar",
357 "wantarray",
358 "caller",
359 "exit",
360 "pos",
361 "quotemeta",
362 "chr",
363 "ord",
364 "pack",
365 "unpack",
366 "vec",
367 "tie",
368 "untie",
369 "tied",
370 "chomp",
371 "chop",
372 "defined",
373 "dump",
374 "each",
375 "exists",
376 "formline",
377 "lock",
378 "prototype",
379 "reset",
380 "scalar",
381 "BEGIN",
382 "END",
383 "INIT",
384 "CHECK",
385 "UNITCHECK",
386 "AUTOLOAD",
387 "DESTROY",
388 "all",
389 "any",
390 "none",
391 "take_while",
392 "drop_while",
393 "skip_while",
394 "skip",
395 "first_or",
396 "tap",
397 "peek",
398 "with_index",
399 "pmap",
400 "pflat_map",
401 "puniq",
402 "pfirst",
403 "pany",
404 "pmap_chunked",
405 "pipeline",
406 "pgrep",
407 "pfor",
408 "par_lines",
409 "par_walk",
410 "pwatch",
411 "psort",
412 "reduce",
413 "fold",
414 "preduce",
415 "preduce_init",
416 "pmap_reduce",
417 "pcache",
418 "watch",
419 "tie",
420 "fan",
421 "fan_cap",
422 "pchannel",
423 "pselect",
424 "uniq",
425 "distinct",
426 "uniqstr",
427 "uniqint",
428 "uniqnum",
429 "pairs",
430 "unpairs",
431 "pairkeys",
432 "pairvalues",
433 "pairgrep",
434 "pairmap",
435 "pairfirst",
436 "sample",
437 "zip",
438 "zip_shortest",
439 "mesh",
440 "mesh_shortest",
441 "notall",
442 "reductions",
443 "sum",
444 "sum0",
445 "product",
446 "min",
447 "max",
448 "minstr",
449 "maxstr",
450 "mean",
451 "median",
452 "mode",
453 "stddev",
454 "variance",
455 "async",
456 "spawn",
457 "trace",
458 "timer",
459 "bench",
460 "await",
461 "slurp",
462 "capture",
463 "fetch_url",
464 "fetch",
465 "fetch_json",
466 "fetch_async",
467 "fetch_async_json",
468 "json_jq",
469 "par_fetch",
470 "par_pipeline",
471 "par_csv_read",
472 "par_sed",
473 "try",
474 "catch",
475 "finally",
476 "given",
477 "when",
478 "default",
479 "eval_timeout",
480 "thread",
481 "t",
482];
483
484#[cfg(test)]
485mod tests {
486 use super::*;
487
488 #[test]
489 fn keyword_or_ident_maps_string_ops() {
490 assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
491 assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
492 }
493
494 #[test]
495 fn keyword_or_ident_non_keyword_is_ident() {
496 assert!(matches!(
497 keyword_or_ident("foo_bar"),
498 Token::Ident(s) if s == "foo_bar"
499 ));
500 }
501
502 #[test]
503 fn keyword_or_ident_logical_words_and_repeat() {
504 assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
505 assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
506 assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
507 assert!(matches!(keyword_or_ident("x"), Token::X));
508 }
509
510 #[test]
511 fn keyword_or_ident_string_comparison_words() {
512 assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
513 assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
514 assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
515 }
516
517 #[test]
518 fn keyword_or_ident_string_le_ne() {
519 assert!(matches!(keyword_or_ident("le"), Token::StrLe));
520 assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
521 }
522
523 #[test]
524 fn keyword_or_ident_control_flow_keywords() {
525 assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
526 assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
527 assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
528 assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
529 assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
530 assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
531 assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
532 assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
533 assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
534 }
535
536 #[test]
537 fn keyword_or_ident_declarations() {
538 assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
539 assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
540 assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
541 assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
542 assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
543 assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
544 }
545
546 #[test]
547 fn keyword_or_ident_io_and_list_ops() {
548 assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
549 assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
550 assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
551 assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
552 assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
553 assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
554 assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
555 assert!(matches!(
556 keyword_or_ident("list_count"),
557 Token::Ident(s) if s == "list_count"
558 ));
559 assert!(matches!(
560 keyword_or_ident("list_size"),
561 Token::Ident(s) if s == "list_size"
562 ));
563 assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
564 assert!(matches!(
565 keyword_or_ident("capture"),
566 Token::Ident(s) if s == "capture"
567 ));
568 }
569
570 #[test]
571 fn keyword_or_ident_parallel_primitives() {
572 assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
573 assert!(matches!(
574 keyword_or_ident("pmap_chunked"),
575 Token::Ident(s) if s == "pmap_chunked"
576 ));
577 assert!(matches!(
578 keyword_or_ident("pipeline"),
579 Token::Ident(s) if s == "pipeline"
580 ));
581 assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
582 assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
583 assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
584 assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
585 assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
586 assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
587 assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
588 assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
589 assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
590 }
591
592 #[test]
593 fn keyword_or_ident_type_and_ref() {
594 assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
595 assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
596 assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
597 assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
598 }
599
600 #[test]
601 fn keyword_or_ident_block_hooks() {
602 assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
603 assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
604 assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
605 }
606
607 #[test]
608 fn keyword_or_ident_plain_identifier_untouched() {
609 assert!(matches!(
610 keyword_or_ident("xyzzy123"),
611 Token::Ident(s) if s == "xyzzy123"
612 ));
613 }
614}