Skip to main content

stryke/
token.rs

1/// `Token` — see variants.
2#[derive(Debug, Clone, PartialEq)]
3pub enum Token {
4    // Literals
5    /// `Integer` variant.
6    Integer(i64),
7    /// `Float` variant.
8    Float(f64),
9    /// `SingleString` variant.
10    SingleString(String),
11    /// `DoubleString` variant.
12    DoubleString(String),
13    /// `` `...` `` or `qx{...}` — interpolated like double quotes, then executed as `sh -c` (Perl `qx`).
14    BacktickString(String),
15    /// Regex pattern: (pattern, flags, delimiter)
16    Regex(String, String, char),
17    /// `HereDoc` variant.
18    HereDoc(String, String, bool),
19    /// `QW` variant.
20    QW(Vec<String>),
21
22    // Variables
23    /// `ScalarVar` variant.
24    ScalarVar(String),
25    /// `$$foo` — symbolic scalar deref (inner name is `foo` without sigil).
26    DerefScalarVar(String),
27    /// `ArrayVar` variant.
28    ArrayVar(String),
29    /// `HashVar` variant.
30    HashVar(String),
31    /// `ArrayAt` variant.
32    ArrayAt,
33    /// `HashPercent` variant.
34    HashPercent,
35
36    // Identifiers & keywords
37    /// `Ident` variant.
38    Ident(String),
39    /// `Label` variant.
40    Label(String),
41    /// `PackageSep` variant.
42    PackageSep,
43    /// `format NAME =` … body … `.` (body lines without the closing `.`)
44    FormatDecl {
45        name: String,
46        lines: Vec<String>,
47    },
48
49    // Arithmetic
50    /// `Plus` variant.
51    Plus,
52    /// `Minus` variant.
53    Minus,
54    /// `Star` variant.
55    Star,
56    /// `Slash` variant.
57    Slash,
58    /// `Percent` variant.
59    Percent,
60    /// `Power` variant.
61    Power,
62
63    // String
64    /// `Dot` variant.
65    Dot,
66    /// `X` variant.
67    X,
68
69    // Comparison (numeric)
70    /// `NumEq` variant.
71    NumEq,
72    /// `NumNe` variant.
73    NumNe,
74    /// `NumLt` variant.
75    NumLt,
76    /// `NumGt` variant.
77    NumGt,
78    /// `NumLe` variant.
79    NumLe,
80    /// `NumGe` variant.
81    NumGe,
82    /// `Spaceship` variant.
83    Spaceship,
84
85    // Comparison (string)
86    /// `StrEq` variant.
87    StrEq,
88    /// `StrNe` variant.
89    StrNe,
90    /// `StrLt` variant.
91    StrLt,
92    /// `StrGt` variant.
93    StrGt,
94    /// `StrLe` variant.
95    StrLe,
96    /// `StrGe` variant.
97    StrGe,
98    /// `StrCmp` variant.
99    StrCmp,
100
101    // Logical
102    /// `LogAnd` variant.
103    LogAnd,
104    /// `LogOr` variant.
105    LogOr,
106    /// `LogNot` variant.
107    LogNot,
108    /// `LogAndWord` variant.
109    LogAndWord,
110    /// `LogOrWord` variant.
111    LogOrWord,
112    /// `LogNotWord` variant.
113    LogNotWord,
114    /// `DefinedOr` variant.
115    DefinedOr,
116
117    // Bitwise
118    /// `BitAnd` variant.
119    BitAnd,
120    /// `BitOr` variant.
121    BitOr,
122    /// `BitXor` variant.
123    BitXor,
124    /// `BitNot` variant.
125    BitNot,
126    /// `ShiftLeft` variant.
127    ShiftLeft,
128    /// `ShiftRight` variant.
129    ShiftRight,
130
131    // Assignment
132    /// `Assign` variant.
133    Assign,
134    /// `PlusAssign` variant.
135    PlusAssign,
136    /// `MinusAssign` variant.
137    MinusAssign,
138    /// `MulAssign` variant.
139    MulAssign,
140    /// `DivAssign` variant.
141    DivAssign,
142    /// `ModAssign` variant.
143    ModAssign,
144    /// `PowAssign` variant.
145    PowAssign,
146    /// `DotAssign` variant.
147    DotAssign,
148    /// `x=` — string-repetition compound assign (`$s x= 3`).
149    XAssign,
150    /// `AndAssign` variant.
151    AndAssign,
152    /// `OrAssign` variant.
153    OrAssign,
154    /// `XorAssign` variant.
155    XorAssign,
156    /// `ShiftLeftAssign` variant.
157    ShiftLeftAssign,
158    /// `ShiftRightAssign` variant.
159    ShiftRightAssign,
160    /// Bitwise `&=`
161    BitAndAssign,
162    /// Bitwise `|=`
163    BitOrAssign,
164    /// `DefinedOrAssign` variant.
165    DefinedOrAssign,
166
167    // Increment/Decrement
168    /// `Increment` variant.
169    Increment,
170    /// `Decrement` variant.
171    Decrement,
172
173    // Regex binding
174    /// `BindMatch` variant.
175    BindMatch,
176    /// `BindNotMatch` variant.
177    BindNotMatch,
178
179    // Arrows & separators
180    /// `Arrow` variant.
181    Arrow,
182    /// `FatArrow` variant.
183    FatArrow,
184    /// `|>` — pipe-forward (F#/Elixir): `x |> f(a)` desugars to `f(x, a)` at parse time.
185    PipeForward,
186    /// `~>` — thread-first macro: `~> EXPR stage1 stage2 ...` injects as first arg
187    ThreadArrow,
188    /// `~>>` / `->>` — thread-last macro: injects as last arg
189    ThreadArrowLast,
190    /// `~s>` — streaming thread-first. Per-stage semantics match `~>`
191    /// (insert threaded value as first arg / topic), but each stage runs
192    /// in its own worker connected by bounded channels — items flow one
193    /// at a time. Concurrent (per-item flow with backpressure), not
194    /// chunk-parallel.
195    ThreadArrowStream,
196    /// `~s>>` — streaming thread-last. Per-stage semantics match `~>>`
197    /// (insert threaded value as last arg).
198    ThreadArrowStreamLast,
199    /// `~p>` — parallel-chunk thread-first. Whole pipeline runs per chunk
200    /// in parallel, results auto-merged at end (sugar for
201    /// `par_reduce { stage1 |> stage2 |> ... } SOURCE`). `||>` or
202    /// `|then|` switch from parallel-chunk back to pipe-forward / `~>`.
203    ThreadArrowPar,
204    /// `~p>>` — parallel-chunk thread-last counterpart of `~p>`.
205    ThreadArrowParLast,
206    /// `~d>` — **distributed** thread-first. Same chunk-block semantics as
207    /// `~p>` (each stage operates on `@_` = chunk elements), but the chunks
208    /// are shipped to remote workers on a cluster instead of local rayon
209    /// threads. Syntax: `~d> on $cluster SOURCE stage1 stage2 ...`.
210    /// Sugar for `dist_reduce on $cluster { stages } SOURCE`. Reuses the
211    /// existing `pmap_on` dispatcher (one ssh process per slot, JOB frames
212    /// flowing over a shared work queue, fault tolerance via retry).
213    ThreadArrowDist,
214    /// `~d>>` — distributed thread-last counterpart of `~d>` (insert threaded
215    /// value as last positional arg to each named stage).
216    ThreadArrowDistLast,
217    /// Two-dot range / inclusive flip-flop (`..`).
218    Range,
219    /// Three-dot range / exclusive flip-flop (`...`); list expansion matches `..` (Perl).
220    RangeExclusive,
221    /// `Backslash` variant.
222    Backslash,
223
224    // Delimiters
225    /// `LParen` variant.
226    LParen,
227    /// `RParen` variant.
228    RParen,
229    /// `LBracket` variant.
230    LBracket,
231    /// `RBracket` variant.
232    RBracket,
233    /// `LBrace` variant.
234    LBrace,
235    /// `RBrace` variant.
236    RBrace,
237    /// `>{` — standalone block in thread macro (not attached to a function)
238    ArrowBrace,
239
240    // Punctuation
241    Semicolon,
242    Comma,
243    Question,
244    Colon,
245
246    // I/O
247    Diamond,
248    ReadLine(String),
249
250    // File tests
251    FileTest(char),
252
253    // Special
254    Eof,
255    Newline,
256}
257
258impl Token {
259    /// `is_term_start` — see implementation.
260    pub fn is_term_start(&self) -> bool {
261        matches!(
262            self,
263            Token::Integer(_)
264                | Token::Float(_)
265                | Token::SingleString(_)
266                | Token::DoubleString(_)
267                | Token::BacktickString(_)
268                | Token::ScalarVar(_)
269                | Token::DerefScalarVar(_)
270                | Token::ArrayVar(_)
271                | Token::HashVar(_)
272                | Token::Ident(_)
273                | Token::LParen
274                | Token::LBracket
275                | Token::LBrace
276                | Token::Backslash
277                | Token::Minus
278                | Token::LogNot
279                | Token::BitNot
280                | Token::LogNotWord
281                | Token::QW(_)
282                | Token::Regex(_, _, _)
283                | Token::FileTest(_)
284                | Token::ThreadArrow
285                | Token::ThreadArrowLast
286                | Token::ThreadArrowStream
287                | Token::ThreadArrowStreamLast
288                | Token::ThreadArrowPar
289                | Token::ThreadArrowParLast
290        )
291    }
292}
293
294/// Resolve an identifier to a keyword token or leave as Ident.
295pub fn keyword_or_ident(word: &str) -> Token {
296    match word {
297        "x" => Token::X,
298        "eq" => Token::StrEq,
299        "ne" => Token::StrNe,
300        "lt" => Token::StrLt,
301        "gt" => Token::StrGt,
302        "le" => Token::StrLe,
303        "ge" => Token::StrGe,
304        "cmp" => Token::StrCmp,
305        "and" => Token::LogAndWord,
306        "or" => Token::LogOrWord,
307        "not" => Token::LogNotWord,
308        _ => Token::Ident(word.to_string()),
309    }
310}
311
312/// All Perl keyword identifiers that are NOT converted to separate token variants.
313/// The parser recognizes these as `Token::Ident("keyword")`.
314pub const KEYWORDS: &[&str] = &[
315    "frozen",
316    "typed",
317    "my",
318    "var",
319    "val",
320    "mysync",
321    "our",
322    "oursync",
323    "local",
324    "sub",
325    "fn",
326    "struct",
327    "enum",
328    "class",
329    "trait",
330    "extends",
331    "impl",
332    "pub",
333    "priv",
334    "Self",
335    "return",
336    "if",
337    "elsif",
338    "else",
339    "unless",
340    "while",
341    "until",
342    "for",
343    "foreach",
344    "do",
345    "last",
346    "next",
347    "redo",
348    "use",
349    "no",
350    "require",
351    "package",
352    "bless",
353    "print",
354    "say",
355    "die",
356    "warn",
357    "chomp",
358    "chop",
359    "push",
360    "pop",
361    "shift",
362    "shuffle",
363    "chunked",
364    "windowed",
365    "unshift",
366    "splice",
367    "split",
368    "join",
369    "json_decode",
370    "json_encode",
371    "json_jq",
372    "jwt_decode",
373    "jwt_decode_unsafe",
374    "jwt_encode",
375    "log_debug",
376    "log_error",
377    "log_info",
378    "log_json",
379    "log_level",
380    "log_trace",
381    "log_warn",
382    "sha256",
383    "sha1",
384    "md5",
385    "hmac_sha256",
386    "hmac",
387    "uuid",
388    "base64_encode",
389    "base64_decode",
390    "hex_encode",
391    "hex_decode",
392    "gzip",
393    "gunzip",
394    "zstd",
395    "zstd_decode",
396    "datetime_utc",
397    "datetime_from_epoch",
398    "datetime_parse_rfc3339",
399    "datetime_strftime",
400    "toml_decode",
401    "toml_encode",
402    "yaml_decode",
403    "yaml_encode",
404    "url_encode",
405    "url_decode",
406    "uri_escape",
407    "uri_unescape",
408    "sort",
409    "reverse",
410    "reversed",
411    "map",
412    "maps",
413    "flat_map",
414    "flat_maps",
415    "flatten",
416    "compact",
417    "reject",
418    "grepv",
419    "concat",
420    "chain",
421    "set",
422    "list_count",
423    "list_size",
424    "count",
425    "size",
426    "cnt",
427    "inject",
428    "first",
429    "detect",
430    "find",
431    "find_all",
432    "match",
433    "grep",
434    "greps",
435    "keys",
436    "values",
437    "each",
438    "delete",
439    "exists",
440    "open",
441    "close",
442    "read",
443    "write",
444    "seek",
445    "tell",
446    "eof",
447    "defined",
448    "undef",
449    "ref",
450    "eval",
451    "exec",
452    "system",
453    "chdir",
454    "mkdir",
455    "rmdir",
456    "unlink",
457    "rename",
458    "chmod",
459    "chown",
460    "length",
461    "substr",
462    "index",
463    "rindex",
464    "sprintf",
465    "printf",
466    "lc",
467    "uc",
468    "lcfirst",
469    "ucfirst",
470    "hex",
471    "oct",
472    "int",
473    "abs",
474    "sqrt",
475    "scalar",
476    "wantarray",
477    "caller",
478    "exit",
479    "pos",
480    "quotemeta",
481    "chr",
482    "ord",
483    "pack",
484    "unpack",
485    "vec",
486    "tie",
487    "untie",
488    "tied",
489    "chomp",
490    "chop",
491    "defined",
492    "dump",
493    "each",
494    "exists",
495    "formline",
496    "lock",
497    "prototype",
498    "reset",
499    "scalar",
500    "BEGIN",
501    "END",
502    "INIT",
503    "CHECK",
504    "UNITCHECK",
505    "AUTOLOAD",
506    "DESTROY",
507    "all",
508    "any",
509    "none",
510    "take_while",
511    "drop_while",
512    "skip_while",
513    "skip",
514    "first_or",
515    "tap",
516    "peek",
517    "with_index",
518    "pmap",
519    "pflat_map",
520    "puniq",
521    "pfirst",
522    "pany",
523    "pmap_chunked",
524    "pipeline",
525    "pgrep",
526    "pfor",
527    "par_lines",
528    "par_walk",
529    "pwatch",
530    "psort",
531    "reduce",
532    "fold",
533    "preduce",
534    "preduce_init",
535    "pmap_reduce",
536    "pcache",
537    "watch",
538    "tie",
539    "fan",
540    "fan_cap",
541    "pchannel",
542    "pselect",
543    "uniq",
544    "distinct",
545    "uniqstr",
546    "uniqint",
547    "uniqnum",
548    "pairs",
549    "unpairs",
550    "pairkeys",
551    "pairvalues",
552    "pairgrep",
553    "pairmap",
554    "pairfirst",
555    "sample",
556    "zip",
557    "zip_shortest",
558    "mesh",
559    "mesh_shortest",
560    "notall",
561    "reductions",
562    "sum",
563    "sum0",
564    "product",
565    "min",
566    "max",
567    "minstr",
568    "maxstr",
569    "mean",
570    "median",
571    "mode",
572    "stddev",
573    "variance",
574    "async",
575    "spawn",
576    "trace",
577    "timer",
578    "bench",
579    "await",
580    "slurp",
581    "swallow",
582    "ingest",
583    "burp",
584    "god",
585    "capture",
586    "fetch_url",
587    "fetch",
588    "fetch_json",
589    "fetch_async",
590    "fetch_async_json",
591    "json_jq",
592    "par_fetch",
593    "par_pipeline",
594    "par_csv_read",
595    "par_sed",
596    "try",
597    "catch",
598    "finally",
599    "given",
600    "when",
601    "default",
602    "eval_timeout",
603    "thread",
604    "t",
605];
606
607#[cfg(test)]
608mod tests {
609    use super::*;
610
611    #[test]
612    fn keyword_or_ident_maps_string_ops() {
613        assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
614        assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
615    }
616
617    #[test]
618    fn keyword_or_ident_non_keyword_is_ident() {
619        assert!(matches!(
620            keyword_or_ident("foo_bar"),
621            Token::Ident(s) if s == "foo_bar"
622        ));
623    }
624
625    #[test]
626    fn keyword_or_ident_logical_words_and_repeat() {
627        assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
628        assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
629        assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
630        assert!(matches!(keyword_or_ident("x"), Token::X));
631    }
632
633    #[test]
634    fn keyword_or_ident_string_comparison_words() {
635        assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
636        assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
637        assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
638    }
639
640    #[test]
641    fn keyword_or_ident_string_le_ne() {
642        assert!(matches!(keyword_or_ident("le"), Token::StrLe));
643        assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
644    }
645
646    #[test]
647    fn keyword_or_ident_control_flow_keywords() {
648        assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
649        assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
650        assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
651        assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
652        assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
653        assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
654        assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
655        assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
656        assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
657    }
658
659    #[test]
660    fn keyword_or_ident_declarations() {
661        assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
662        assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
663        assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
664        assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
665        assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
666        assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
667    }
668
669    #[test]
670    fn keyword_or_ident_io_and_list_ops() {
671        assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
672        assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
673        assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
674        assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
675        assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
676        assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
677        assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
678        assert!(matches!(
679            keyword_or_ident("list_count"),
680            Token::Ident(s) if s == "list_count"
681        ));
682        assert!(matches!(
683            keyword_or_ident("list_size"),
684            Token::Ident(s) if s == "list_size"
685        ));
686        assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
687        assert!(matches!(
688            keyword_or_ident("capture"),
689            Token::Ident(s) if s == "capture"
690        ));
691    }
692
693    #[test]
694    fn keyword_or_ident_parallel_primitives() {
695        assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
696        assert!(matches!(
697            keyword_or_ident("pmap_chunked"),
698            Token::Ident(s) if s == "pmap_chunked"
699        ));
700        assert!(matches!(
701            keyword_or_ident("pipeline"),
702            Token::Ident(s) if s == "pipeline"
703        ));
704        assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
705        assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
706        assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
707        assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
708        assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
709        assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
710        assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
711        assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
712        assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
713    }
714
715    #[test]
716    fn keyword_or_ident_type_and_ref() {
717        assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
718        assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
719        assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
720        assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
721    }
722
723    #[test]
724    fn keyword_or_ident_block_hooks() {
725        assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
726        assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
727        assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
728    }
729
730    #[test]
731    fn keyword_or_ident_plain_identifier_untouched() {
732        assert!(matches!(
733            keyword_or_ident("xyzzy123"),
734            Token::Ident(s) if s == "xyzzy123"
735        ));
736    }
737}