Skip to main content

stryke/
token.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3    // Literals
4    Integer(i64),
5    Float(f64),
6    SingleString(String),
7    DoubleString(String),
8    /// `` `...` `` or `qx{...}` — interpolated like double quotes, then executed as `sh -c` (Perl `qx`).
9    BacktickString(String),
10    /// Regex pattern: (pattern, flags, delimiter)
11    Regex(String, String, char),
12    HereDoc(String, String, bool),
13    QW(Vec<String>),
14
15    // Variables
16    ScalarVar(String),
17    /// `$$foo` — symbolic scalar deref (inner name is `foo` without sigil).
18    DerefScalarVar(String),
19    ArrayVar(String),
20    HashVar(String),
21    ArrayAt,
22    HashPercent,
23
24    // Identifiers & keywords
25    Ident(String),
26    Label(String),
27    PackageSep,
28    /// `format NAME =` … body … `.` (body lines without the closing `.`)
29    FormatDecl {
30        name: String,
31        lines: Vec<String>,
32    },
33
34    // Arithmetic
35    Plus,
36    Minus,
37    Star,
38    Slash,
39    Percent,
40    Power,
41
42    // String
43    Dot,
44    X,
45
46    // Comparison (numeric)
47    NumEq,
48    NumNe,
49    NumLt,
50    NumGt,
51    NumLe,
52    NumGe,
53    Spaceship,
54
55    // Comparison (string)
56    StrEq,
57    StrNe,
58    StrLt,
59    StrGt,
60    StrLe,
61    StrGe,
62    StrCmp,
63
64    // Logical
65    LogAnd,
66    LogOr,
67    LogNot,
68    LogAndWord,
69    LogOrWord,
70    LogNotWord,
71    DefinedOr,
72
73    // Bitwise
74    BitAnd,
75    BitOr,
76    BitXor,
77    BitNot,
78    ShiftLeft,
79    ShiftRight,
80
81    // Assignment
82    Assign,
83    PlusAssign,
84    MinusAssign,
85    MulAssign,
86    DivAssign,
87    ModAssign,
88    PowAssign,
89    DotAssign,
90    /// `x=` — string-repetition compound assign (`$s x= 3`).
91    XAssign,
92    AndAssign,
93    OrAssign,
94    XorAssign,
95    ShiftLeftAssign,
96    ShiftRightAssign,
97    /// Bitwise `&=`
98    BitAndAssign,
99    /// Bitwise `|=`
100    BitOrAssign,
101    DefinedOrAssign,
102
103    // Increment/Decrement
104    Increment,
105    Decrement,
106
107    // Regex binding
108    BindMatch,
109    BindNotMatch,
110
111    // Arrows & separators
112    Arrow,
113    FatArrow,
114    /// `|>` — pipe-forward (F#/Elixir): `x |> f(a)` desugars to `f(x, a)` at parse time.
115    PipeForward,
116    /// `~>` — thread-first macro: `~> EXPR stage1 stage2 ...` injects as first arg
117    ThreadArrow,
118    /// `~>>` / `->>` — thread-last macro: injects as last arg
119    ThreadArrowLast,
120    /// `~s>` — streaming thread-first. Per-stage semantics match `~>`
121    /// (insert threaded value as first arg / topic), but each stage runs
122    /// in its own worker connected by bounded channels — items flow one
123    /// at a time. Concurrent (per-item flow with backpressure), not
124    /// chunk-parallel.
125    ThreadArrowStream,
126    /// `~s>>` — streaming thread-last. Per-stage semantics match `~>>`
127    /// (insert threaded value as last arg).
128    ThreadArrowStreamLast,
129    /// `~p>` — parallel-chunk thread-first. Whole pipeline runs per chunk
130    /// in parallel, results auto-merged at end (sugar for
131    /// `par_reduce { stage1 |> stage2 |> ... } SOURCE`). `||>` or
132    /// `|then|` switch from parallel-chunk back to pipe-forward / `~>`.
133    ThreadArrowPar,
134    /// `~p>>` — parallel-chunk thread-last counterpart of `~p>`.
135    ThreadArrowParLast,
136    /// `~d>` — **distributed** thread-first. Same chunk-block semantics as
137    /// `~p>` (each stage operates on `@_` = chunk elements), but the chunks
138    /// are shipped to remote workers on a cluster instead of local rayon
139    /// threads. Syntax: `~d> on $cluster SOURCE stage1 stage2 ...`.
140    /// Sugar for `dist_reduce on $cluster { stages } SOURCE`. Reuses the
141    /// existing `pmap_on` dispatcher (one ssh process per slot, JOB frames
142    /// flowing over a shared work queue, fault tolerance via retry).
143    ThreadArrowDist,
144    /// `~d>>` — distributed thread-last counterpart of `~d>` (insert threaded
145    /// value as last positional arg to each named stage).
146    ThreadArrowDistLast,
147    /// Two-dot range / inclusive flip-flop (`..`).
148    Range,
149    /// Three-dot range / exclusive flip-flop (`...`); list expansion matches `..` (Perl).
150    RangeExclusive,
151    Backslash,
152
153    // Delimiters
154    LParen,
155    RParen,
156    LBracket,
157    RBracket,
158    LBrace,
159    RBrace,
160    /// `>{` — standalone block in thread macro (not attached to a function)
161    ArrowBrace,
162
163    // Punctuation
164    Semicolon,
165    Comma,
166    Question,
167    Colon,
168
169    // I/O
170    Diamond,
171    ReadLine(String),
172
173    // File tests
174    FileTest(char),
175
176    // Special
177    Eof,
178    Newline,
179}
180
181impl Token {
182    pub fn is_term_start(&self) -> bool {
183        matches!(
184            self,
185            Token::Integer(_)
186                | Token::Float(_)
187                | Token::SingleString(_)
188                | Token::DoubleString(_)
189                | Token::BacktickString(_)
190                | Token::ScalarVar(_)
191                | Token::DerefScalarVar(_)
192                | Token::ArrayVar(_)
193                | Token::HashVar(_)
194                | Token::Ident(_)
195                | Token::LParen
196                | Token::LBracket
197                | Token::LBrace
198                | Token::Backslash
199                | Token::Minus
200                | Token::LogNot
201                | Token::BitNot
202                | Token::LogNotWord
203                | Token::QW(_)
204                | Token::Regex(_, _, _)
205                | Token::FileTest(_)
206                | Token::ThreadArrow
207                | Token::ThreadArrowLast
208                | Token::ThreadArrowStream
209                | Token::ThreadArrowStreamLast
210                | Token::ThreadArrowPar
211                | Token::ThreadArrowParLast
212        )
213    }
214}
215
216/// Resolve an identifier to a keyword token or leave as Ident.
217pub fn keyword_or_ident(word: &str) -> Token {
218    match word {
219        "x" => Token::X,
220        "eq" => Token::StrEq,
221        "ne" => Token::StrNe,
222        "lt" => Token::StrLt,
223        "gt" => Token::StrGt,
224        "le" => Token::StrLe,
225        "ge" => Token::StrGe,
226        "cmp" => Token::StrCmp,
227        "and" => Token::LogAndWord,
228        "or" => Token::LogOrWord,
229        "not" => Token::LogNotWord,
230        _ => Token::Ident(word.to_string()),
231    }
232}
233
234/// All Perl keyword identifiers that are NOT converted to separate token variants.
235/// The parser recognizes these as `Token::Ident("keyword")`.
236pub const KEYWORDS: &[&str] = &[
237    "frozen",
238    "typed",
239    "my",
240    "mysync",
241    "our",
242    "oursync",
243    "local",
244    "sub",
245    "fn",
246    "struct",
247    "enum",
248    "class",
249    "trait",
250    "extends",
251    "impl",
252    "pub",
253    "priv",
254    "Self",
255    "return",
256    "if",
257    "elsif",
258    "else",
259    "unless",
260    "while",
261    "until",
262    "for",
263    "foreach",
264    "do",
265    "last",
266    "next",
267    "redo",
268    "use",
269    "no",
270    "require",
271    "package",
272    "bless",
273    "print",
274    "say",
275    "die",
276    "warn",
277    "chomp",
278    "chop",
279    "push",
280    "pop",
281    "shift",
282    "shuffle",
283    "chunked",
284    "windowed",
285    "unshift",
286    "splice",
287    "split",
288    "join",
289    "json_decode",
290    "json_encode",
291    "json_jq",
292    "jwt_decode",
293    "jwt_decode_unsafe",
294    "jwt_encode",
295    "log_debug",
296    "log_error",
297    "log_info",
298    "log_json",
299    "log_level",
300    "log_trace",
301    "log_warn",
302    "sha256",
303    "sha1",
304    "md5",
305    "hmac_sha256",
306    "hmac",
307    "uuid",
308    "base64_encode",
309    "base64_decode",
310    "hex_encode",
311    "hex_decode",
312    "gzip",
313    "gunzip",
314    "zstd",
315    "zstd_decode",
316    "datetime_utc",
317    "datetime_from_epoch",
318    "datetime_parse_rfc3339",
319    "datetime_strftime",
320    "toml_decode",
321    "toml_encode",
322    "yaml_decode",
323    "yaml_encode",
324    "url_encode",
325    "url_decode",
326    "uri_escape",
327    "uri_unescape",
328    "sort",
329    "reverse",
330    "reversed",
331    "map",
332    "maps",
333    "flat_map",
334    "flat_maps",
335    "flatten",
336    "compact",
337    "reject",
338    "grepv",
339    "concat",
340    "chain",
341    "set",
342    "list_count",
343    "list_size",
344    "count",
345    "size",
346    "cnt",
347    "inject",
348    "first",
349    "detect",
350    "find",
351    "find_all",
352    "match",
353    "grep",
354    "greps",
355    "keys",
356    "values",
357    "each",
358    "delete",
359    "exists",
360    "open",
361    "close",
362    "read",
363    "write",
364    "seek",
365    "tell",
366    "eof",
367    "defined",
368    "undef",
369    "ref",
370    "eval",
371    "exec",
372    "system",
373    "chdir",
374    "mkdir",
375    "rmdir",
376    "unlink",
377    "rename",
378    "chmod",
379    "chown",
380    "length",
381    "substr",
382    "index",
383    "rindex",
384    "sprintf",
385    "printf",
386    "lc",
387    "uc",
388    "lcfirst",
389    "ucfirst",
390    "hex",
391    "oct",
392    "int",
393    "abs",
394    "sqrt",
395    "scalar",
396    "wantarray",
397    "caller",
398    "exit",
399    "pos",
400    "quotemeta",
401    "chr",
402    "ord",
403    "pack",
404    "unpack",
405    "vec",
406    "tie",
407    "untie",
408    "tied",
409    "chomp",
410    "chop",
411    "defined",
412    "dump",
413    "each",
414    "exists",
415    "formline",
416    "lock",
417    "prototype",
418    "reset",
419    "scalar",
420    "BEGIN",
421    "END",
422    "INIT",
423    "CHECK",
424    "UNITCHECK",
425    "AUTOLOAD",
426    "DESTROY",
427    "all",
428    "any",
429    "none",
430    "take_while",
431    "drop_while",
432    "skip_while",
433    "skip",
434    "first_or",
435    "tap",
436    "peek",
437    "with_index",
438    "pmap",
439    "pflat_map",
440    "puniq",
441    "pfirst",
442    "pany",
443    "pmap_chunked",
444    "pipeline",
445    "pgrep",
446    "pfor",
447    "par_lines",
448    "par_walk",
449    "pwatch",
450    "psort",
451    "reduce",
452    "fold",
453    "preduce",
454    "preduce_init",
455    "pmap_reduce",
456    "pcache",
457    "watch",
458    "tie",
459    "fan",
460    "fan_cap",
461    "pchannel",
462    "pselect",
463    "uniq",
464    "distinct",
465    "uniqstr",
466    "uniqint",
467    "uniqnum",
468    "pairs",
469    "unpairs",
470    "pairkeys",
471    "pairvalues",
472    "pairgrep",
473    "pairmap",
474    "pairfirst",
475    "sample",
476    "zip",
477    "zip_shortest",
478    "mesh",
479    "mesh_shortest",
480    "notall",
481    "reductions",
482    "sum",
483    "sum0",
484    "product",
485    "min",
486    "max",
487    "minstr",
488    "maxstr",
489    "mean",
490    "median",
491    "mode",
492    "stddev",
493    "variance",
494    "async",
495    "spawn",
496    "trace",
497    "timer",
498    "bench",
499    "await",
500    "slurp",
501    "capture",
502    "fetch_url",
503    "fetch",
504    "fetch_json",
505    "fetch_async",
506    "fetch_async_json",
507    "json_jq",
508    "par_fetch",
509    "par_pipeline",
510    "par_csv_read",
511    "par_sed",
512    "try",
513    "catch",
514    "finally",
515    "given",
516    "when",
517    "default",
518    "eval_timeout",
519    "thread",
520    "t",
521];
522
523#[cfg(test)]
524mod tests {
525    use super::*;
526
527    #[test]
528    fn keyword_or_ident_maps_string_ops() {
529        assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
530        assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
531    }
532
533    #[test]
534    fn keyword_or_ident_non_keyword_is_ident() {
535        assert!(matches!(
536            keyword_or_ident("foo_bar"),
537            Token::Ident(s) if s == "foo_bar"
538        ));
539    }
540
541    #[test]
542    fn keyword_or_ident_logical_words_and_repeat() {
543        assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
544        assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
545        assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
546        assert!(matches!(keyword_or_ident("x"), Token::X));
547    }
548
549    #[test]
550    fn keyword_or_ident_string_comparison_words() {
551        assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
552        assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
553        assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
554    }
555
556    #[test]
557    fn keyword_or_ident_string_le_ne() {
558        assert!(matches!(keyword_or_ident("le"), Token::StrLe));
559        assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
560    }
561
562    #[test]
563    fn keyword_or_ident_control_flow_keywords() {
564        assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
565        assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
566        assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
567        assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
568        assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
569        assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
570        assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
571        assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
572        assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
573    }
574
575    #[test]
576    fn keyword_or_ident_declarations() {
577        assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
578        assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
579        assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
580        assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
581        assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
582        assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
583    }
584
585    #[test]
586    fn keyword_or_ident_io_and_list_ops() {
587        assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
588        assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
589        assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
590        assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
591        assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
592        assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
593        assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
594        assert!(matches!(
595            keyword_or_ident("list_count"),
596            Token::Ident(s) if s == "list_count"
597        ));
598        assert!(matches!(
599            keyword_or_ident("list_size"),
600            Token::Ident(s) if s == "list_size"
601        ));
602        assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
603        assert!(matches!(
604            keyword_or_ident("capture"),
605            Token::Ident(s) if s == "capture"
606        ));
607    }
608
609    #[test]
610    fn keyword_or_ident_parallel_primitives() {
611        assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
612        assert!(matches!(
613            keyword_or_ident("pmap_chunked"),
614            Token::Ident(s) if s == "pmap_chunked"
615        ));
616        assert!(matches!(
617            keyword_or_ident("pipeline"),
618            Token::Ident(s) if s == "pipeline"
619        ));
620        assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
621        assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
622        assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
623        assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
624        assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
625        assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
626        assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
627        assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
628        assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
629    }
630
631    #[test]
632    fn keyword_or_ident_type_and_ref() {
633        assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
634        assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
635        assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
636        assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
637    }
638
639    #[test]
640    fn keyword_or_ident_block_hooks() {
641        assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
642        assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
643        assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
644    }
645
646    #[test]
647    fn keyword_or_ident_plain_identifier_untouched() {
648        assert!(matches!(
649            keyword_or_ident("xyzzy123"),
650            Token::Ident(s) if s == "xyzzy123"
651        ));
652    }
653}