Skip to main content

stryke/
token.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3    // Literals
4    Integer(i64),
5    Float(f64),
6    SingleString(String),
7    DoubleString(String),
8    /// `` `...` `` or `qx{...}` — interpolated like double quotes, then executed as `sh -c` (Perl `qx`).
9    BacktickString(String),
10    /// Regex pattern: (pattern, flags, delimiter)
11    Regex(String, String, char),
12    HereDoc(String, String, bool),
13    QW(Vec<String>),
14
15    // Variables
16    ScalarVar(String),
17    /// `$$foo` — symbolic scalar deref (inner name is `foo` without sigil).
18    DerefScalarVar(String),
19    ArrayVar(String),
20    HashVar(String),
21    ArrayAt,
22    HashPercent,
23
24    // Identifiers & keywords
25    Ident(String),
26    Label(String),
27    PackageSep,
28    /// `format NAME =` … body … `.` (body lines without the closing `.`)
29    FormatDecl {
30        name: String,
31        lines: Vec<String>,
32    },
33
34    // Arithmetic
35    Plus,
36    Minus,
37    Star,
38    Slash,
39    Percent,
40    Power,
41
42    // String
43    Dot,
44    X,
45
46    // Comparison (numeric)
47    NumEq,
48    NumNe,
49    NumLt,
50    NumGt,
51    NumLe,
52    NumGe,
53    Spaceship,
54
55    // Comparison (string)
56    StrEq,
57    StrNe,
58    StrLt,
59    StrGt,
60    StrLe,
61    StrGe,
62    StrCmp,
63
64    // Logical
65    LogAnd,
66    LogOr,
67    LogNot,
68    LogAndWord,
69    LogOrWord,
70    LogNotWord,
71    DefinedOr,
72
73    // Bitwise
74    BitAnd,
75    BitOr,
76    BitXor,
77    BitNot,
78    ShiftLeft,
79    ShiftRight,
80
81    // Assignment
82    Assign,
83    PlusAssign,
84    MinusAssign,
85    MulAssign,
86    DivAssign,
87    ModAssign,
88    PowAssign,
89    DotAssign,
90    AndAssign,
91    OrAssign,
92    XorAssign,
93    ShiftLeftAssign,
94    ShiftRightAssign,
95    /// Bitwise `&=`
96    BitAndAssign,
97    /// Bitwise `|=`
98    BitOrAssign,
99    DefinedOrAssign,
100
101    // Increment/Decrement
102    Increment,
103    Decrement,
104
105    // Regex binding
106    BindMatch,
107    BindNotMatch,
108
109    // Arrows & separators
110    Arrow,
111    FatArrow,
112    /// `|>` — pipe-forward (F#/Elixir): `x |> f(a)` desugars to `f(x, a)` at parse time.
113    PipeForward,
114    /// `~>` or `->>` — thread macro alias: `~> EXPR stage1 stage2 ...` == `t EXPR stage1 stage2 ...`
115    ThreadArrow,
116    /// Two-dot range / inclusive flip-flop (`..`).
117    Range,
118    /// Three-dot range / exclusive flip-flop (`...`); list expansion matches `..` (Perl).
119    RangeExclusive,
120    Backslash,
121
122    // Delimiters
123    LParen,
124    RParen,
125    LBracket,
126    RBracket,
127    LBrace,
128    RBrace,
129    /// `>{` — standalone block in thread macro (not attached to a function)
130    ArrowBrace,
131
132    // Punctuation
133    Semicolon,
134    Comma,
135    Question,
136    Colon,
137
138    // I/O
139    Diamond,
140    ReadLine(String),
141
142    // File tests
143    FileTest(char),
144
145    // Special
146    Eof,
147    Newline,
148}
149
150impl Token {
151    pub fn is_term_start(&self) -> bool {
152        matches!(
153            self,
154            Token::Integer(_)
155                | Token::Float(_)
156                | Token::SingleString(_)
157                | Token::DoubleString(_)
158                | Token::BacktickString(_)
159                | Token::ScalarVar(_)
160                | Token::DerefScalarVar(_)
161                | Token::ArrayVar(_)
162                | Token::HashVar(_)
163                | Token::Ident(_)
164                | Token::LParen
165                | Token::LBracket
166                | Token::LBrace
167                | Token::Backslash
168                | Token::Minus
169                | Token::LogNot
170                | Token::BitNot
171                | Token::LogNotWord
172                | Token::QW(_)
173                | Token::Regex(_, _, _)
174                | Token::FileTest(_)
175        )
176    }
177}
178
179/// Resolve an identifier to a keyword token or leave as Ident.
180pub fn keyword_or_ident(word: &str) -> Token {
181    match word {
182        "x" => Token::X,
183        "eq" => Token::StrEq,
184        "ne" => Token::StrNe,
185        "lt" => Token::StrLt,
186        "gt" => Token::StrGt,
187        "le" => Token::StrLe,
188        "ge" => Token::StrGe,
189        "cmp" => Token::StrCmp,
190        "and" => Token::LogAndWord,
191        "or" => Token::LogOrWord,
192        "not" => Token::LogNotWord,
193        _ => Token::Ident(word.to_string()),
194    }
195}
196
197/// All Perl keyword identifiers that are NOT converted to separate token variants.
198/// The parser recognizes these as `Token::Ident("keyword")`.
199pub const KEYWORDS: &[&str] = &[
200    "frozen",
201    "typed",
202    "my",
203    "mysync",
204    "our",
205    "local",
206    "sub",
207    "fn",
208    "struct",
209    "enum",
210    "class",
211    "trait",
212    "extends",
213    "impl",
214    "pub",
215    "priv",
216    "Self",
217    "return",
218    "if",
219    "elsif",
220    "else",
221    "unless",
222    "while",
223    "until",
224    "for",
225    "foreach",
226    "do",
227    "last",
228    "next",
229    "redo",
230    "use",
231    "no",
232    "require",
233    "package",
234    "bless",
235    "print",
236    "say",
237    "die",
238    "warn",
239    "chomp",
240    "chop",
241    "push",
242    "pop",
243    "shift",
244    "shuffle",
245    "chunked",
246    "windowed",
247    "unshift",
248    "splice",
249    "split",
250    "join",
251    "json_decode",
252    "json_encode",
253    "json_jq",
254    "jwt_decode",
255    "jwt_decode_unsafe",
256    "jwt_encode",
257    "log_debug",
258    "log_error",
259    "log_info",
260    "log_json",
261    "log_level",
262    "log_trace",
263    "log_warn",
264    "sha256",
265    "sha1",
266    "md5",
267    "hmac_sha256",
268    "hmac",
269    "uuid",
270    "base64_encode",
271    "base64_decode",
272    "hex_encode",
273    "hex_decode",
274    "gzip",
275    "gunzip",
276    "zstd",
277    "zstd_decode",
278    "datetime_utc",
279    "datetime_from_epoch",
280    "datetime_parse_rfc3339",
281    "datetime_strftime",
282    "toml_decode",
283    "toml_encode",
284    "yaml_decode",
285    "yaml_encode",
286    "url_encode",
287    "url_decode",
288    "uri_escape",
289    "uri_unescape",
290    "sort",
291    "reverse",
292    "reversed",
293    "map",
294    "maps",
295    "flat_map",
296    "flat_maps",
297    "flatten",
298    "compact",
299    "reject",
300    "concat",
301    "chain",
302    "set",
303    "list_count",
304    "list_size",
305    "count",
306    "size",
307    "cnt",
308    "inject",
309    "first",
310    "detect",
311    "find",
312    "find_all",
313    "match",
314    "grep",
315    "greps",
316    "keys",
317    "values",
318    "each",
319    "delete",
320    "exists",
321    "open",
322    "close",
323    "read",
324    "write",
325    "seek",
326    "tell",
327    "eof",
328    "defined",
329    "undef",
330    "ref",
331    "eval",
332    "exec",
333    "system",
334    "chdir",
335    "mkdir",
336    "rmdir",
337    "unlink",
338    "rename",
339    "chmod",
340    "chown",
341    "length",
342    "substr",
343    "index",
344    "rindex",
345    "sprintf",
346    "printf",
347    "lc",
348    "uc",
349    "lcfirst",
350    "ucfirst",
351    "hex",
352    "oct",
353    "int",
354    "abs",
355    "sqrt",
356    "scalar",
357    "wantarray",
358    "caller",
359    "exit",
360    "pos",
361    "quotemeta",
362    "chr",
363    "ord",
364    "pack",
365    "unpack",
366    "vec",
367    "tie",
368    "untie",
369    "tied",
370    "chomp",
371    "chop",
372    "defined",
373    "dump",
374    "each",
375    "exists",
376    "formline",
377    "lock",
378    "prototype",
379    "reset",
380    "scalar",
381    "BEGIN",
382    "END",
383    "INIT",
384    "CHECK",
385    "UNITCHECK",
386    "AUTOLOAD",
387    "DESTROY",
388    "all",
389    "any",
390    "none",
391    "take_while",
392    "drop_while",
393    "skip_while",
394    "skip",
395    "first_or",
396    "tap",
397    "peek",
398    "with_index",
399    "pmap",
400    "pflat_map",
401    "puniq",
402    "pfirst",
403    "pany",
404    "pmap_chunked",
405    "pipeline",
406    "pgrep",
407    "pfor",
408    "par_lines",
409    "par_walk",
410    "pwatch",
411    "psort",
412    "reduce",
413    "fold",
414    "preduce",
415    "preduce_init",
416    "pmap_reduce",
417    "pcache",
418    "watch",
419    "tie",
420    "fan",
421    "fan_cap",
422    "pchannel",
423    "pselect",
424    "uniq",
425    "distinct",
426    "uniqstr",
427    "uniqint",
428    "uniqnum",
429    "pairs",
430    "unpairs",
431    "pairkeys",
432    "pairvalues",
433    "pairgrep",
434    "pairmap",
435    "pairfirst",
436    "sample",
437    "zip",
438    "zip_shortest",
439    "mesh",
440    "mesh_shortest",
441    "notall",
442    "reductions",
443    "sum",
444    "sum0",
445    "product",
446    "min",
447    "max",
448    "minstr",
449    "maxstr",
450    "mean",
451    "median",
452    "mode",
453    "stddev",
454    "variance",
455    "async",
456    "spawn",
457    "trace",
458    "timer",
459    "bench",
460    "await",
461    "slurp",
462    "capture",
463    "fetch_url",
464    "fetch",
465    "fetch_json",
466    "fetch_async",
467    "fetch_async_json",
468    "json_jq",
469    "par_fetch",
470    "par_pipeline",
471    "par_csv_read",
472    "par_sed",
473    "try",
474    "catch",
475    "finally",
476    "given",
477    "when",
478    "default",
479    "eval_timeout",
480    "thread",
481    "t",
482];
483
484#[cfg(test)]
485mod tests {
486    use super::*;
487
488    #[test]
489    fn keyword_or_ident_maps_string_ops() {
490        assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
491        assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
492    }
493
494    #[test]
495    fn keyword_or_ident_non_keyword_is_ident() {
496        assert!(matches!(
497            keyword_or_ident("foo_bar"),
498            Token::Ident(s) if s == "foo_bar"
499        ));
500    }
501
502    #[test]
503    fn keyword_or_ident_logical_words_and_repeat() {
504        assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
505        assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
506        assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
507        assert!(matches!(keyword_or_ident("x"), Token::X));
508    }
509
510    #[test]
511    fn keyword_or_ident_string_comparison_words() {
512        assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
513        assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
514        assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
515    }
516
517    #[test]
518    fn keyword_or_ident_string_le_ne() {
519        assert!(matches!(keyword_or_ident("le"), Token::StrLe));
520        assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
521    }
522
523    #[test]
524    fn keyword_or_ident_control_flow_keywords() {
525        assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
526        assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
527        assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
528        assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
529        assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
530        assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
531        assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
532        assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
533        assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
534    }
535
536    #[test]
537    fn keyword_or_ident_declarations() {
538        assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
539        assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
540        assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
541        assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
542        assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
543        assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
544    }
545
546    #[test]
547    fn keyword_or_ident_io_and_list_ops() {
548        assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
549        assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
550        assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
551        assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
552        assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
553        assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
554        assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
555        assert!(matches!(
556            keyword_or_ident("list_count"),
557            Token::Ident(s) if s == "list_count"
558        ));
559        assert!(matches!(
560            keyword_or_ident("list_size"),
561            Token::Ident(s) if s == "list_size"
562        ));
563        assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
564        assert!(matches!(
565            keyword_or_ident("capture"),
566            Token::Ident(s) if s == "capture"
567        ));
568    }
569
570    #[test]
571    fn keyword_or_ident_parallel_primitives() {
572        assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
573        assert!(matches!(
574            keyword_or_ident("pmap_chunked"),
575            Token::Ident(s) if s == "pmap_chunked"
576        ));
577        assert!(matches!(
578            keyword_or_ident("pipeline"),
579            Token::Ident(s) if s == "pipeline"
580        ));
581        assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
582        assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
583        assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
584        assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
585        assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
586        assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
587        assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
588        assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
589        assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
590    }
591
592    #[test]
593    fn keyword_or_ident_type_and_ref() {
594        assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
595        assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
596        assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
597        assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
598    }
599
600    #[test]
601    fn keyword_or_ident_block_hooks() {
602        assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
603        assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
604        assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
605    }
606
607    #[test]
608    fn keyword_or_ident_plain_identifier_untouched() {
609        assert!(matches!(
610            keyword_or_ident("xyzzy123"),
611            Token::Ident(s) if s == "xyzzy123"
612        ));
613    }
614}