Skip to main content

stryke/
token.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3    // Literals
4    Integer(i64),
5    Float(f64),
6    SingleString(String),
7    DoubleString(String),
8    /// `` `...` `` or `qx{...}` — interpolated like double quotes, then executed as `sh -c` (Perl `qx`).
9    BacktickString(String),
10    /// Regex pattern: (pattern, flags, delimiter)
11    Regex(String, String, char),
12    HereDoc(String, String, bool),
13    QW(Vec<String>),
14
15    // Variables
16    ScalarVar(String),
17    /// `$$foo` — symbolic scalar deref (inner name is `foo` without sigil).
18    DerefScalarVar(String),
19    ArrayVar(String),
20    HashVar(String),
21    ArrayAt,
22    HashPercent,
23
24    // Identifiers & keywords
25    Ident(String),
26    Label(String),
27    PackageSep,
28    /// `format NAME =` … body … `.` (body lines without the closing `.`)
29    FormatDecl {
30        name: String,
31        lines: Vec<String>,
32    },
33
34    // Arithmetic
35    Plus,
36    Minus,
37    Star,
38    Slash,
39    Percent,
40    Power,
41
42    // String
43    Dot,
44    X,
45
46    // Comparison (numeric)
47    NumEq,
48    NumNe,
49    NumLt,
50    NumGt,
51    NumLe,
52    NumGe,
53    Spaceship,
54
55    // Comparison (string)
56    StrEq,
57    StrNe,
58    StrLt,
59    StrGt,
60    StrLe,
61    StrGe,
62    StrCmp,
63
64    // Logical
65    LogAnd,
66    LogOr,
67    LogNot,
68    LogAndWord,
69    LogOrWord,
70    LogNotWord,
71    DefinedOr,
72
73    // Bitwise
74    BitAnd,
75    BitOr,
76    BitXor,
77    BitNot,
78    ShiftLeft,
79    ShiftRight,
80
81    // Assignment
82    Assign,
83    PlusAssign,
84    MinusAssign,
85    MulAssign,
86    DivAssign,
87    ModAssign,
88    PowAssign,
89    DotAssign,
90    AndAssign,
91    OrAssign,
92    XorAssign,
93    ShiftLeftAssign,
94    ShiftRightAssign,
95    /// Bitwise `&=`
96    BitAndAssign,
97    /// Bitwise `|=`
98    BitOrAssign,
99    DefinedOrAssign,
100
101    // Increment/Decrement
102    Increment,
103    Decrement,
104
105    // Regex binding
106    BindMatch,
107    BindNotMatch,
108
109    // Arrows & separators
110    Arrow,
111    FatArrow,
112    /// `|>` — pipe-forward (F#/Elixir): `x |> f(a)` desugars to `f(x, a)` at parse time.
113    PipeForward,
114    /// `~>` — thread-first macro: `~> EXPR stage1 stage2 ...` injects as first arg
115    ThreadArrow,
116    /// `~>>` / `->>` — thread-last macro: injects as last arg
117    ThreadArrowLast,
118    /// Two-dot range / inclusive flip-flop (`..`).
119    Range,
120    /// Three-dot range / exclusive flip-flop (`...`); list expansion matches `..` (Perl).
121    RangeExclusive,
122    Backslash,
123
124    // Delimiters
125    LParen,
126    RParen,
127    LBracket,
128    RBracket,
129    LBrace,
130    RBrace,
131    /// `>{` — standalone block in thread macro (not attached to a function)
132    ArrowBrace,
133
134    // Punctuation
135    Semicolon,
136    Comma,
137    Question,
138    Colon,
139
140    // I/O
141    Diamond,
142    ReadLine(String),
143
144    // File tests
145    FileTest(char),
146
147    // Special
148    Eof,
149    Newline,
150}
151
152impl Token {
153    pub fn is_term_start(&self) -> bool {
154        matches!(
155            self,
156            Token::Integer(_)
157                | Token::Float(_)
158                | Token::SingleString(_)
159                | Token::DoubleString(_)
160                | Token::BacktickString(_)
161                | Token::ScalarVar(_)
162                | Token::DerefScalarVar(_)
163                | Token::ArrayVar(_)
164                | Token::HashVar(_)
165                | Token::Ident(_)
166                | Token::LParen
167                | Token::LBracket
168                | Token::LBrace
169                | Token::Backslash
170                | Token::Minus
171                | Token::LogNot
172                | Token::BitNot
173                | Token::LogNotWord
174                | Token::QW(_)
175                | Token::Regex(_, _, _)
176                | Token::FileTest(_)
177                | Token::ThreadArrow
178                | Token::ThreadArrowLast
179        )
180    }
181}
182
183/// Resolve an identifier to a keyword token or leave as Ident.
184pub fn keyword_or_ident(word: &str) -> Token {
185    match word {
186        "x" => Token::X,
187        "eq" => Token::StrEq,
188        "ne" => Token::StrNe,
189        "lt" => Token::StrLt,
190        "gt" => Token::StrGt,
191        "le" => Token::StrLe,
192        "ge" => Token::StrGe,
193        "cmp" => Token::StrCmp,
194        "and" => Token::LogAndWord,
195        "or" => Token::LogOrWord,
196        "not" => Token::LogNotWord,
197        _ => Token::Ident(word.to_string()),
198    }
199}
200
201/// All Perl keyword identifiers that are NOT converted to separate token variants.
202/// The parser recognizes these as `Token::Ident("keyword")`.
203pub const KEYWORDS: &[&str] = &[
204    "frozen",
205    "typed",
206    "my",
207    "mysync",
208    "our",
209    "oursync",
210    "local",
211    "sub",
212    "fn",
213    "struct",
214    "enum",
215    "class",
216    "trait",
217    "extends",
218    "impl",
219    "pub",
220    "priv",
221    "Self",
222    "return",
223    "if",
224    "elsif",
225    "else",
226    "unless",
227    "while",
228    "until",
229    "for",
230    "foreach",
231    "do",
232    "last",
233    "next",
234    "redo",
235    "use",
236    "no",
237    "require",
238    "package",
239    "bless",
240    "print",
241    "say",
242    "die",
243    "warn",
244    "chomp",
245    "chop",
246    "push",
247    "pop",
248    "shift",
249    "shuffle",
250    "chunked",
251    "windowed",
252    "unshift",
253    "splice",
254    "split",
255    "join",
256    "json_decode",
257    "json_encode",
258    "json_jq",
259    "jwt_decode",
260    "jwt_decode_unsafe",
261    "jwt_encode",
262    "log_debug",
263    "log_error",
264    "log_info",
265    "log_json",
266    "log_level",
267    "log_trace",
268    "log_warn",
269    "sha256",
270    "sha1",
271    "md5",
272    "hmac_sha256",
273    "hmac",
274    "uuid",
275    "base64_encode",
276    "base64_decode",
277    "hex_encode",
278    "hex_decode",
279    "gzip",
280    "gunzip",
281    "zstd",
282    "zstd_decode",
283    "datetime_utc",
284    "datetime_from_epoch",
285    "datetime_parse_rfc3339",
286    "datetime_strftime",
287    "toml_decode",
288    "toml_encode",
289    "yaml_decode",
290    "yaml_encode",
291    "url_encode",
292    "url_decode",
293    "uri_escape",
294    "uri_unescape",
295    "sort",
296    "reverse",
297    "reversed",
298    "map",
299    "maps",
300    "flat_map",
301    "flat_maps",
302    "flatten",
303    "compact",
304    "reject",
305    "grepv",
306    "concat",
307    "chain",
308    "set",
309    "list_count",
310    "list_size",
311    "count",
312    "size",
313    "cnt",
314    "inject",
315    "first",
316    "detect",
317    "find",
318    "find_all",
319    "match",
320    "grep",
321    "greps",
322    "keys",
323    "values",
324    "each",
325    "delete",
326    "exists",
327    "open",
328    "close",
329    "read",
330    "write",
331    "seek",
332    "tell",
333    "eof",
334    "defined",
335    "undef",
336    "ref",
337    "eval",
338    "exec",
339    "system",
340    "chdir",
341    "mkdir",
342    "rmdir",
343    "unlink",
344    "rename",
345    "chmod",
346    "chown",
347    "length",
348    "substr",
349    "index",
350    "rindex",
351    "sprintf",
352    "printf",
353    "lc",
354    "uc",
355    "lcfirst",
356    "ucfirst",
357    "hex",
358    "oct",
359    "int",
360    "abs",
361    "sqrt",
362    "scalar",
363    "wantarray",
364    "caller",
365    "exit",
366    "pos",
367    "quotemeta",
368    "chr",
369    "ord",
370    "pack",
371    "unpack",
372    "vec",
373    "tie",
374    "untie",
375    "tied",
376    "chomp",
377    "chop",
378    "defined",
379    "dump",
380    "each",
381    "exists",
382    "formline",
383    "lock",
384    "prototype",
385    "reset",
386    "scalar",
387    "BEGIN",
388    "END",
389    "INIT",
390    "CHECK",
391    "UNITCHECK",
392    "AUTOLOAD",
393    "DESTROY",
394    "all",
395    "any",
396    "none",
397    "take_while",
398    "drop_while",
399    "skip_while",
400    "skip",
401    "first_or",
402    "tap",
403    "peek",
404    "with_index",
405    "pmap",
406    "pflat_map",
407    "puniq",
408    "pfirst",
409    "pany",
410    "pmap_chunked",
411    "pipeline",
412    "pgrep",
413    "pfor",
414    "par_lines",
415    "par_walk",
416    "pwatch",
417    "psort",
418    "reduce",
419    "fold",
420    "preduce",
421    "preduce_init",
422    "pmap_reduce",
423    "pcache",
424    "watch",
425    "tie",
426    "fan",
427    "fan_cap",
428    "pchannel",
429    "pselect",
430    "uniq",
431    "distinct",
432    "uniqstr",
433    "uniqint",
434    "uniqnum",
435    "pairs",
436    "unpairs",
437    "pairkeys",
438    "pairvalues",
439    "pairgrep",
440    "pairmap",
441    "pairfirst",
442    "sample",
443    "zip",
444    "zip_shortest",
445    "mesh",
446    "mesh_shortest",
447    "notall",
448    "reductions",
449    "sum",
450    "sum0",
451    "product",
452    "min",
453    "max",
454    "minstr",
455    "maxstr",
456    "mean",
457    "median",
458    "mode",
459    "stddev",
460    "variance",
461    "async",
462    "spawn",
463    "trace",
464    "timer",
465    "bench",
466    "await",
467    "slurp",
468    "capture",
469    "fetch_url",
470    "fetch",
471    "fetch_json",
472    "fetch_async",
473    "fetch_async_json",
474    "json_jq",
475    "par_fetch",
476    "par_pipeline",
477    "par_csv_read",
478    "par_sed",
479    "try",
480    "catch",
481    "finally",
482    "given",
483    "when",
484    "default",
485    "eval_timeout",
486    "thread",
487    "t",
488];
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493
494    #[test]
495    fn keyword_or_ident_maps_string_ops() {
496        assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
497        assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
498    }
499
500    #[test]
501    fn keyword_or_ident_non_keyword_is_ident() {
502        assert!(matches!(
503            keyword_or_ident("foo_bar"),
504            Token::Ident(s) if s == "foo_bar"
505        ));
506    }
507
508    #[test]
509    fn keyword_or_ident_logical_words_and_repeat() {
510        assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
511        assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
512        assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
513        assert!(matches!(keyword_or_ident("x"), Token::X));
514    }
515
516    #[test]
517    fn keyword_or_ident_string_comparison_words() {
518        assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
519        assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
520        assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
521    }
522
523    #[test]
524    fn keyword_or_ident_string_le_ne() {
525        assert!(matches!(keyword_or_ident("le"), Token::StrLe));
526        assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
527    }
528
529    #[test]
530    fn keyword_or_ident_control_flow_keywords() {
531        assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
532        assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
533        assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
534        assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
535        assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
536        assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
537        assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
538        assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
539        assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
540    }
541
542    #[test]
543    fn keyword_or_ident_declarations() {
544        assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
545        assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
546        assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
547        assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
548        assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
549        assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
550    }
551
552    #[test]
553    fn keyword_or_ident_io_and_list_ops() {
554        assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
555        assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
556        assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
557        assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
558        assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
559        assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
560        assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
561        assert!(matches!(
562            keyword_or_ident("list_count"),
563            Token::Ident(s) if s == "list_count"
564        ));
565        assert!(matches!(
566            keyword_or_ident("list_size"),
567            Token::Ident(s) if s == "list_size"
568        ));
569        assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
570        assert!(matches!(
571            keyword_or_ident("capture"),
572            Token::Ident(s) if s == "capture"
573        ));
574    }
575
576    #[test]
577    fn keyword_or_ident_parallel_primitives() {
578        assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
579        assert!(matches!(
580            keyword_or_ident("pmap_chunked"),
581            Token::Ident(s) if s == "pmap_chunked"
582        ));
583        assert!(matches!(
584            keyword_or_ident("pipeline"),
585            Token::Ident(s) if s == "pipeline"
586        ));
587        assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
588        assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
589        assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
590        assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
591        assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
592        assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
593        assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
594        assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
595        assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
596    }
597
598    #[test]
599    fn keyword_or_ident_type_and_ref() {
600        assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
601        assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
602        assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
603        assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
604    }
605
606    #[test]
607    fn keyword_or_ident_block_hooks() {
608        assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
609        assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
610        assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
611    }
612
613    #[test]
614    fn keyword_or_ident_plain_identifier_untouched() {
615        assert!(matches!(
616            keyword_or_ident("xyzzy123"),
617            Token::Ident(s) if s == "xyzzy123"
618        ));
619    }
620}