Skip to main content

stryke/
token.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3    // Literals
4    Integer(i64),
5    Float(f64),
6    SingleString(String),
7    DoubleString(String),
8    /// `` `...` `` or `qx{...}` — interpolated like double quotes, then executed as `sh -c` (Perl `qx`).
9    BacktickString(String),
10    /// Regex pattern: (pattern, flags, delimiter)
11    Regex(String, String, char),
12    HereDoc(String, String, bool),
13    QW(Vec<String>),
14
15    // Variables
16    ScalarVar(String),
17    /// `$$foo` — symbolic scalar deref (inner name is `foo` without sigil).
18    DerefScalarVar(String),
19    ArrayVar(String),
20    HashVar(String),
21    ArrayAt,
22    HashPercent,
23
24    // Identifiers & keywords
25    Ident(String),
26    Label(String),
27    PackageSep,
28    /// `format NAME =` … body … `.` (body lines without the closing `.`)
29    FormatDecl {
30        name: String,
31        lines: Vec<String>,
32    },
33
34    // Arithmetic
35    Plus,
36    Minus,
37    Star,
38    Slash,
39    Percent,
40    Power,
41
42    // String
43    Dot,
44    X,
45
46    // Comparison (numeric)
47    NumEq,
48    NumNe,
49    NumLt,
50    NumGt,
51    NumLe,
52    NumGe,
53    Spaceship,
54
55    // Comparison (string)
56    StrEq,
57    StrNe,
58    StrLt,
59    StrGt,
60    StrLe,
61    StrGe,
62    StrCmp,
63
64    // Logical
65    LogAnd,
66    LogOr,
67    LogNot,
68    LogAndWord,
69    LogOrWord,
70    LogNotWord,
71    DefinedOr,
72
73    // Bitwise
74    BitAnd,
75    BitOr,
76    BitXor,
77    BitNot,
78    ShiftLeft,
79    ShiftRight,
80
81    // Assignment
82    Assign,
83    PlusAssign,
84    MinusAssign,
85    MulAssign,
86    DivAssign,
87    ModAssign,
88    PowAssign,
89    DotAssign,
90    AndAssign,
91    OrAssign,
92    XorAssign,
93    ShiftLeftAssign,
94    ShiftRightAssign,
95    /// Bitwise `&=`
96    BitAndAssign,
97    /// Bitwise `|=`
98    BitOrAssign,
99    DefinedOrAssign,
100
101    // Increment/Decrement
102    Increment,
103    Decrement,
104
105    // Regex binding
106    BindMatch,
107    BindNotMatch,
108
109    // Arrows & separators
110    Arrow,
111    FatArrow,
112    /// `|>` — pipe-forward (F#/Elixir): `x |> f(a)` desugars to `f(x, a)` at parse time.
113    PipeForward,
114    /// `~>` — thread-first macro: `~> EXPR stage1 stage2 ...` injects as first arg
115    ThreadArrow,
116    /// `~>>` / `->>` — thread-last macro: injects as last arg
117    ThreadArrowLast,
118    /// Two-dot range / inclusive flip-flop (`..`).
119    Range,
120    /// Three-dot range / exclusive flip-flop (`...`); list expansion matches `..` (Perl).
121    RangeExclusive,
122    Backslash,
123
124    // Delimiters
125    LParen,
126    RParen,
127    LBracket,
128    RBracket,
129    LBrace,
130    RBrace,
131    /// `>{` — standalone block in thread macro (not attached to a function)
132    ArrowBrace,
133
134    // Punctuation
135    Semicolon,
136    Comma,
137    Question,
138    Colon,
139
140    // I/O
141    Diamond,
142    ReadLine(String),
143
144    // File tests
145    FileTest(char),
146
147    // Special
148    Eof,
149    Newline,
150}
151
152impl Token {
153    pub fn is_term_start(&self) -> bool {
154        matches!(
155            self,
156            Token::Integer(_)
157                | Token::Float(_)
158                | Token::SingleString(_)
159                | Token::DoubleString(_)
160                | Token::BacktickString(_)
161                | Token::ScalarVar(_)
162                | Token::DerefScalarVar(_)
163                | Token::ArrayVar(_)
164                | Token::HashVar(_)
165                | Token::Ident(_)
166                | Token::LParen
167                | Token::LBracket
168                | Token::LBrace
169                | Token::Backslash
170                | Token::Minus
171                | Token::LogNot
172                | Token::BitNot
173                | Token::LogNotWord
174                | Token::QW(_)
175                | Token::Regex(_, _, _)
176                | Token::FileTest(_)
177        )
178    }
179}
180
181/// Resolve an identifier to a keyword token or leave as Ident.
182pub fn keyword_or_ident(word: &str) -> Token {
183    match word {
184        "x" => Token::X,
185        "eq" => Token::StrEq,
186        "ne" => Token::StrNe,
187        "lt" => Token::StrLt,
188        "gt" => Token::StrGt,
189        "le" => Token::StrLe,
190        "ge" => Token::StrGe,
191        "cmp" => Token::StrCmp,
192        "and" => Token::LogAndWord,
193        "or" => Token::LogOrWord,
194        "not" => Token::LogNotWord,
195        _ => Token::Ident(word.to_string()),
196    }
197}
198
199/// All Perl keyword identifiers that are NOT converted to separate token variants.
200/// The parser recognizes these as `Token::Ident("keyword")`.
201pub const KEYWORDS: &[&str] = &[
202    "frozen",
203    "typed",
204    "my",
205    "mysync",
206    "our",
207    "local",
208    "sub",
209    "fn",
210    "struct",
211    "enum",
212    "class",
213    "trait",
214    "extends",
215    "impl",
216    "pub",
217    "priv",
218    "Self",
219    "return",
220    "if",
221    "elsif",
222    "else",
223    "unless",
224    "while",
225    "until",
226    "for",
227    "foreach",
228    "do",
229    "last",
230    "next",
231    "redo",
232    "use",
233    "no",
234    "require",
235    "package",
236    "bless",
237    "print",
238    "say",
239    "die",
240    "warn",
241    "chomp",
242    "chop",
243    "push",
244    "pop",
245    "shift",
246    "shuffle",
247    "chunked",
248    "windowed",
249    "unshift",
250    "splice",
251    "split",
252    "join",
253    "json_decode",
254    "json_encode",
255    "json_jq",
256    "jwt_decode",
257    "jwt_decode_unsafe",
258    "jwt_encode",
259    "log_debug",
260    "log_error",
261    "log_info",
262    "log_json",
263    "log_level",
264    "log_trace",
265    "log_warn",
266    "sha256",
267    "sha1",
268    "md5",
269    "hmac_sha256",
270    "hmac",
271    "uuid",
272    "base64_encode",
273    "base64_decode",
274    "hex_encode",
275    "hex_decode",
276    "gzip",
277    "gunzip",
278    "zstd",
279    "zstd_decode",
280    "datetime_utc",
281    "datetime_from_epoch",
282    "datetime_parse_rfc3339",
283    "datetime_strftime",
284    "toml_decode",
285    "toml_encode",
286    "yaml_decode",
287    "yaml_encode",
288    "url_encode",
289    "url_decode",
290    "uri_escape",
291    "uri_unescape",
292    "sort",
293    "reverse",
294    "reversed",
295    "map",
296    "maps",
297    "flat_map",
298    "flat_maps",
299    "flatten",
300    "compact",
301    "reject",
302    "concat",
303    "chain",
304    "set",
305    "list_count",
306    "list_size",
307    "count",
308    "size",
309    "cnt",
310    "inject",
311    "first",
312    "detect",
313    "find",
314    "find_all",
315    "match",
316    "grep",
317    "greps",
318    "keys",
319    "values",
320    "each",
321    "delete",
322    "exists",
323    "open",
324    "close",
325    "read",
326    "write",
327    "seek",
328    "tell",
329    "eof",
330    "defined",
331    "undef",
332    "ref",
333    "eval",
334    "exec",
335    "system",
336    "chdir",
337    "mkdir",
338    "rmdir",
339    "unlink",
340    "rename",
341    "chmod",
342    "chown",
343    "length",
344    "substr",
345    "index",
346    "rindex",
347    "sprintf",
348    "printf",
349    "lc",
350    "uc",
351    "lcfirst",
352    "ucfirst",
353    "hex",
354    "oct",
355    "int",
356    "abs",
357    "sqrt",
358    "scalar",
359    "wantarray",
360    "caller",
361    "exit",
362    "pos",
363    "quotemeta",
364    "chr",
365    "ord",
366    "pack",
367    "unpack",
368    "vec",
369    "tie",
370    "untie",
371    "tied",
372    "chomp",
373    "chop",
374    "defined",
375    "dump",
376    "each",
377    "exists",
378    "formline",
379    "lock",
380    "prototype",
381    "reset",
382    "scalar",
383    "BEGIN",
384    "END",
385    "INIT",
386    "CHECK",
387    "UNITCHECK",
388    "AUTOLOAD",
389    "DESTROY",
390    "all",
391    "any",
392    "none",
393    "take_while",
394    "drop_while",
395    "skip_while",
396    "skip",
397    "first_or",
398    "tap",
399    "peek",
400    "with_index",
401    "pmap",
402    "pflat_map",
403    "puniq",
404    "pfirst",
405    "pany",
406    "pmap_chunked",
407    "pipeline",
408    "pgrep",
409    "pfor",
410    "par_lines",
411    "par_walk",
412    "pwatch",
413    "psort",
414    "reduce",
415    "fold",
416    "preduce",
417    "preduce_init",
418    "pmap_reduce",
419    "pcache",
420    "watch",
421    "tie",
422    "fan",
423    "fan_cap",
424    "pchannel",
425    "pselect",
426    "uniq",
427    "distinct",
428    "uniqstr",
429    "uniqint",
430    "uniqnum",
431    "pairs",
432    "unpairs",
433    "pairkeys",
434    "pairvalues",
435    "pairgrep",
436    "pairmap",
437    "pairfirst",
438    "sample",
439    "zip",
440    "zip_shortest",
441    "mesh",
442    "mesh_shortest",
443    "notall",
444    "reductions",
445    "sum",
446    "sum0",
447    "product",
448    "min",
449    "max",
450    "minstr",
451    "maxstr",
452    "mean",
453    "median",
454    "mode",
455    "stddev",
456    "variance",
457    "async",
458    "spawn",
459    "trace",
460    "timer",
461    "bench",
462    "await",
463    "slurp",
464    "capture",
465    "fetch_url",
466    "fetch",
467    "fetch_json",
468    "fetch_async",
469    "fetch_async_json",
470    "json_jq",
471    "par_fetch",
472    "par_pipeline",
473    "par_csv_read",
474    "par_sed",
475    "try",
476    "catch",
477    "finally",
478    "given",
479    "when",
480    "default",
481    "eval_timeout",
482    "thread",
483    "t",
484];
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489
490    #[test]
491    fn keyword_or_ident_maps_string_ops() {
492        assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
493        assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
494    }
495
496    #[test]
497    fn keyword_or_ident_non_keyword_is_ident() {
498        assert!(matches!(
499            keyword_or_ident("foo_bar"),
500            Token::Ident(s) if s == "foo_bar"
501        ));
502    }
503
504    #[test]
505    fn keyword_or_ident_logical_words_and_repeat() {
506        assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
507        assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
508        assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
509        assert!(matches!(keyword_or_ident("x"), Token::X));
510    }
511
512    #[test]
513    fn keyword_or_ident_string_comparison_words() {
514        assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
515        assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
516        assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
517    }
518
519    #[test]
520    fn keyword_or_ident_string_le_ne() {
521        assert!(matches!(keyword_or_ident("le"), Token::StrLe));
522        assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
523    }
524
525    #[test]
526    fn keyword_or_ident_control_flow_keywords() {
527        assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
528        assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
529        assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
530        assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
531        assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
532        assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
533        assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
534        assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
535        assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
536    }
537
538    #[test]
539    fn keyword_or_ident_declarations() {
540        assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
541        assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
542        assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
543        assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
544        assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
545        assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
546    }
547
548    #[test]
549    fn keyword_or_ident_io_and_list_ops() {
550        assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
551        assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
552        assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
553        assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
554        assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
555        assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
556        assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
557        assert!(matches!(
558            keyword_or_ident("list_count"),
559            Token::Ident(s) if s == "list_count"
560        ));
561        assert!(matches!(
562            keyword_or_ident("list_size"),
563            Token::Ident(s) if s == "list_size"
564        ));
565        assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
566        assert!(matches!(
567            keyword_or_ident("capture"),
568            Token::Ident(s) if s == "capture"
569        ));
570    }
571
572    #[test]
573    fn keyword_or_ident_parallel_primitives() {
574        assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
575        assert!(matches!(
576            keyword_or_ident("pmap_chunked"),
577            Token::Ident(s) if s == "pmap_chunked"
578        ));
579        assert!(matches!(
580            keyword_or_ident("pipeline"),
581            Token::Ident(s) if s == "pipeline"
582        ));
583        assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
584        assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
585        assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
586        assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
587        assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
588        assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
589        assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
590        assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
591        assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
592    }
593
594    #[test]
595    fn keyword_or_ident_type_and_ref() {
596        assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
597        assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
598        assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
599        assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
600    }
601
602    #[test]
603    fn keyword_or_ident_block_hooks() {
604        assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
605        assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
606        assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
607    }
608
609    #[test]
610    fn keyword_or_ident_plain_identifier_untouched() {
611        assert!(matches!(
612            keyword_or_ident("xyzzy123"),
613            Token::Ident(s) if s == "xyzzy123"
614        ));
615    }
616}