Skip to main content

stryke/
token.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3    // Literals
4    Integer(i64),
5    Float(f64),
6    SingleString(String),
7    DoubleString(String),
8    /// `` `...` `` or `qx{...}` — interpolated like double quotes, then executed as `sh -c` (Perl `qx`).
9    BacktickString(String),
10    /// Regex pattern: (pattern, flags, delimiter)
11    Regex(String, String, char),
12    HereDoc(String, String, bool),
13    QW(Vec<String>),
14
15    // Variables
16    ScalarVar(String),
17    /// `$$foo` — symbolic scalar deref (inner name is `foo` without sigil).
18    DerefScalarVar(String),
19    ArrayVar(String),
20    HashVar(String),
21    ArrayAt,
22    HashPercent,
23
24    // Identifiers & keywords
25    Ident(String),
26    Label(String),
27    PackageSep,
28    /// `format NAME =` … body … `.` (body lines without the closing `.`)
29    FormatDecl {
30        name: String,
31        lines: Vec<String>,
32    },
33
34    // Arithmetic
35    Plus,
36    Minus,
37    Star,
38    Slash,
39    Percent,
40    Power,
41
42    // String
43    Dot,
44    X,
45
46    // Comparison (numeric)
47    NumEq,
48    NumNe,
49    NumLt,
50    NumGt,
51    NumLe,
52    NumGe,
53    Spaceship,
54
55    // Comparison (string)
56    StrEq,
57    StrNe,
58    StrLt,
59    StrGt,
60    StrLe,
61    StrGe,
62    StrCmp,
63
64    // Logical
65    LogAnd,
66    LogOr,
67    LogNot,
68    LogAndWord,
69    LogOrWord,
70    LogNotWord,
71    DefinedOr,
72
73    // Bitwise
74    BitAnd,
75    BitOr,
76    BitXor,
77    BitNot,
78    ShiftLeft,
79    ShiftRight,
80
81    // Assignment
82    Assign,
83    PlusAssign,
84    MinusAssign,
85    MulAssign,
86    DivAssign,
87    ModAssign,
88    PowAssign,
89    DotAssign,
90    AndAssign,
91    OrAssign,
92    XorAssign,
93    ShiftLeftAssign,
94    ShiftRightAssign,
95    /// Bitwise `&=`
96    BitAndAssign,
97    /// Bitwise `|=`
98    BitOrAssign,
99    DefinedOrAssign,
100
101    // Increment/Decrement
102    Increment,
103    Decrement,
104
105    // Regex binding
106    BindMatch,
107    BindNotMatch,
108
109    // Arrows & separators
110    Arrow,
111    FatArrow,
112    /// `|>` — pipe-forward (F#/Elixir): `x |> f(a)` desugars to `f(x, a)` at parse time.
113    PipeForward,
114    /// `~>` — thread-first macro: `~> EXPR stage1 stage2 ...` injects as first arg
115    ThreadArrow,
116    /// `~>>` / `->>` — thread-last macro: injects as last arg
117    ThreadArrowLast,
118    /// `~s>` — streaming thread-first. Per-stage semantics match `~>`
119    /// (insert threaded value as first arg / topic), but each stage runs
120    /// in its own worker connected by bounded channels — items flow one
121    /// at a time. Concurrent (per-item flow with backpressure), not
122    /// chunk-parallel.
123    ThreadArrowStream,
124    /// `~s>>` — streaming thread-last. Per-stage semantics match `~>>`
125    /// (insert threaded value as last arg).
126    ThreadArrowStreamLast,
127    /// `~p>` — parallel-chunk thread-first. Whole pipeline runs per chunk
128    /// in parallel, results auto-merged at end (sugar for
129    /// `par_reduce { stage1 |> stage2 |> ... } SOURCE`). `||>` or
130    /// `|then|` switch from parallel-chunk back to pipe-forward / `~>`.
131    ThreadArrowPar,
132    /// `~p>>` — parallel-chunk thread-last counterpart of `~p>`.
133    ThreadArrowParLast,
134    /// Two-dot range / inclusive flip-flop (`..`).
135    Range,
136    /// Three-dot range / exclusive flip-flop (`...`); list expansion matches `..` (Perl).
137    RangeExclusive,
138    Backslash,
139
140    // Delimiters
141    LParen,
142    RParen,
143    LBracket,
144    RBracket,
145    LBrace,
146    RBrace,
147    /// `>{` — standalone block in thread macro (not attached to a function)
148    ArrowBrace,
149
150    // Punctuation
151    Semicolon,
152    Comma,
153    Question,
154    Colon,
155
156    // I/O
157    Diamond,
158    ReadLine(String),
159
160    // File tests
161    FileTest(char),
162
163    // Special
164    Eof,
165    Newline,
166}
167
168impl Token {
169    pub fn is_term_start(&self) -> bool {
170        matches!(
171            self,
172            Token::Integer(_)
173                | Token::Float(_)
174                | Token::SingleString(_)
175                | Token::DoubleString(_)
176                | Token::BacktickString(_)
177                | Token::ScalarVar(_)
178                | Token::DerefScalarVar(_)
179                | Token::ArrayVar(_)
180                | Token::HashVar(_)
181                | Token::Ident(_)
182                | Token::LParen
183                | Token::LBracket
184                | Token::LBrace
185                | Token::Backslash
186                | Token::Minus
187                | Token::LogNot
188                | Token::BitNot
189                | Token::LogNotWord
190                | Token::QW(_)
191                | Token::Regex(_, _, _)
192                | Token::FileTest(_)
193                | Token::ThreadArrow
194                | Token::ThreadArrowLast
195                | Token::ThreadArrowStream
196                | Token::ThreadArrowStreamLast
197                | Token::ThreadArrowPar
198                | Token::ThreadArrowParLast
199        )
200    }
201}
202
203/// Resolve an identifier to a keyword token or leave as Ident.
204pub fn keyword_or_ident(word: &str) -> Token {
205    match word {
206        "x" => Token::X,
207        "eq" => Token::StrEq,
208        "ne" => Token::StrNe,
209        "lt" => Token::StrLt,
210        "gt" => Token::StrGt,
211        "le" => Token::StrLe,
212        "ge" => Token::StrGe,
213        "cmp" => Token::StrCmp,
214        "and" => Token::LogAndWord,
215        "or" => Token::LogOrWord,
216        "not" => Token::LogNotWord,
217        _ => Token::Ident(word.to_string()),
218    }
219}
220
221/// All Perl keyword identifiers that are NOT converted to separate token variants.
222/// The parser recognizes these as `Token::Ident("keyword")`.
223pub const KEYWORDS: &[&str] = &[
224    "frozen",
225    "typed",
226    "my",
227    "mysync",
228    "our",
229    "oursync",
230    "local",
231    "sub",
232    "fn",
233    "struct",
234    "enum",
235    "class",
236    "trait",
237    "extends",
238    "impl",
239    "pub",
240    "priv",
241    "Self",
242    "return",
243    "if",
244    "elsif",
245    "else",
246    "unless",
247    "while",
248    "until",
249    "for",
250    "foreach",
251    "do",
252    "last",
253    "next",
254    "redo",
255    "use",
256    "no",
257    "require",
258    "package",
259    "bless",
260    "print",
261    "say",
262    "die",
263    "warn",
264    "chomp",
265    "chop",
266    "push",
267    "pop",
268    "shift",
269    "shuffle",
270    "chunked",
271    "windowed",
272    "unshift",
273    "splice",
274    "split",
275    "join",
276    "json_decode",
277    "json_encode",
278    "json_jq",
279    "jwt_decode",
280    "jwt_decode_unsafe",
281    "jwt_encode",
282    "log_debug",
283    "log_error",
284    "log_info",
285    "log_json",
286    "log_level",
287    "log_trace",
288    "log_warn",
289    "sha256",
290    "sha1",
291    "md5",
292    "hmac_sha256",
293    "hmac",
294    "uuid",
295    "base64_encode",
296    "base64_decode",
297    "hex_encode",
298    "hex_decode",
299    "gzip",
300    "gunzip",
301    "zstd",
302    "zstd_decode",
303    "datetime_utc",
304    "datetime_from_epoch",
305    "datetime_parse_rfc3339",
306    "datetime_strftime",
307    "toml_decode",
308    "toml_encode",
309    "yaml_decode",
310    "yaml_encode",
311    "url_encode",
312    "url_decode",
313    "uri_escape",
314    "uri_unescape",
315    "sort",
316    "reverse",
317    "reversed",
318    "map",
319    "maps",
320    "flat_map",
321    "flat_maps",
322    "flatten",
323    "compact",
324    "reject",
325    "grepv",
326    "concat",
327    "chain",
328    "set",
329    "list_count",
330    "list_size",
331    "count",
332    "size",
333    "cnt",
334    "inject",
335    "first",
336    "detect",
337    "find",
338    "find_all",
339    "match",
340    "grep",
341    "greps",
342    "keys",
343    "values",
344    "each",
345    "delete",
346    "exists",
347    "open",
348    "close",
349    "read",
350    "write",
351    "seek",
352    "tell",
353    "eof",
354    "defined",
355    "undef",
356    "ref",
357    "eval",
358    "exec",
359    "system",
360    "chdir",
361    "mkdir",
362    "rmdir",
363    "unlink",
364    "rename",
365    "chmod",
366    "chown",
367    "length",
368    "substr",
369    "index",
370    "rindex",
371    "sprintf",
372    "printf",
373    "lc",
374    "uc",
375    "lcfirst",
376    "ucfirst",
377    "hex",
378    "oct",
379    "int",
380    "abs",
381    "sqrt",
382    "scalar",
383    "wantarray",
384    "caller",
385    "exit",
386    "pos",
387    "quotemeta",
388    "chr",
389    "ord",
390    "pack",
391    "unpack",
392    "vec",
393    "tie",
394    "untie",
395    "tied",
396    "chomp",
397    "chop",
398    "defined",
399    "dump",
400    "each",
401    "exists",
402    "formline",
403    "lock",
404    "prototype",
405    "reset",
406    "scalar",
407    "BEGIN",
408    "END",
409    "INIT",
410    "CHECK",
411    "UNITCHECK",
412    "AUTOLOAD",
413    "DESTROY",
414    "all",
415    "any",
416    "none",
417    "take_while",
418    "drop_while",
419    "skip_while",
420    "skip",
421    "first_or",
422    "tap",
423    "peek",
424    "with_index",
425    "pmap",
426    "pflat_map",
427    "puniq",
428    "pfirst",
429    "pany",
430    "pmap_chunked",
431    "pipeline",
432    "pgrep",
433    "pfor",
434    "par_lines",
435    "par_walk",
436    "pwatch",
437    "psort",
438    "reduce",
439    "fold",
440    "preduce",
441    "preduce_init",
442    "pmap_reduce",
443    "pcache",
444    "watch",
445    "tie",
446    "fan",
447    "fan_cap",
448    "pchannel",
449    "pselect",
450    "uniq",
451    "distinct",
452    "uniqstr",
453    "uniqint",
454    "uniqnum",
455    "pairs",
456    "unpairs",
457    "pairkeys",
458    "pairvalues",
459    "pairgrep",
460    "pairmap",
461    "pairfirst",
462    "sample",
463    "zip",
464    "zip_shortest",
465    "mesh",
466    "mesh_shortest",
467    "notall",
468    "reductions",
469    "sum",
470    "sum0",
471    "product",
472    "min",
473    "max",
474    "minstr",
475    "maxstr",
476    "mean",
477    "median",
478    "mode",
479    "stddev",
480    "variance",
481    "async",
482    "spawn",
483    "trace",
484    "timer",
485    "bench",
486    "await",
487    "slurp",
488    "capture",
489    "fetch_url",
490    "fetch",
491    "fetch_json",
492    "fetch_async",
493    "fetch_async_json",
494    "json_jq",
495    "par_fetch",
496    "par_pipeline",
497    "par_csv_read",
498    "par_sed",
499    "try",
500    "catch",
501    "finally",
502    "given",
503    "when",
504    "default",
505    "eval_timeout",
506    "thread",
507    "t",
508];
509
510#[cfg(test)]
511mod tests {
512    use super::*;
513
514    #[test]
515    fn keyword_or_ident_maps_string_ops() {
516        assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
517        assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
518    }
519
520    #[test]
521    fn keyword_or_ident_non_keyword_is_ident() {
522        assert!(matches!(
523            keyword_or_ident("foo_bar"),
524            Token::Ident(s) if s == "foo_bar"
525        ));
526    }
527
528    #[test]
529    fn keyword_or_ident_logical_words_and_repeat() {
530        assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
531        assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
532        assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
533        assert!(matches!(keyword_or_ident("x"), Token::X));
534    }
535
536    #[test]
537    fn keyword_or_ident_string_comparison_words() {
538        assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
539        assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
540        assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
541    }
542
543    #[test]
544    fn keyword_or_ident_string_le_ne() {
545        assert!(matches!(keyword_or_ident("le"), Token::StrLe));
546        assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
547    }
548
549    #[test]
550    fn keyword_or_ident_control_flow_keywords() {
551        assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
552        assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
553        assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
554        assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
555        assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
556        assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
557        assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
558        assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
559        assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
560    }
561
562    #[test]
563    fn keyword_or_ident_declarations() {
564        assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
565        assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
566        assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
567        assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
568        assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
569        assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
570    }
571
572    #[test]
573    fn keyword_or_ident_io_and_list_ops() {
574        assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
575        assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
576        assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
577        assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
578        assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
579        assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
580        assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
581        assert!(matches!(
582            keyword_or_ident("list_count"),
583            Token::Ident(s) if s == "list_count"
584        ));
585        assert!(matches!(
586            keyword_or_ident("list_size"),
587            Token::Ident(s) if s == "list_size"
588        ));
589        assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
590        assert!(matches!(
591            keyword_or_ident("capture"),
592            Token::Ident(s) if s == "capture"
593        ));
594    }
595
596    #[test]
597    fn keyword_or_ident_parallel_primitives() {
598        assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
599        assert!(matches!(
600            keyword_or_ident("pmap_chunked"),
601            Token::Ident(s) if s == "pmap_chunked"
602        ));
603        assert!(matches!(
604            keyword_or_ident("pipeline"),
605            Token::Ident(s) if s == "pipeline"
606        ));
607        assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
608        assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
609        assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
610        assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
611        assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
612        assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
613        assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
614        assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
615        assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
616    }
617
618    #[test]
619    fn keyword_or_ident_type_and_ref() {
620        assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
621        assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
622        assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
623        assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
624    }
625
626    #[test]
627    fn keyword_or_ident_block_hooks() {
628        assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
629        assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
630        assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
631    }
632
633    #[test]
634    fn keyword_or_ident_plain_identifier_untouched() {
635        assert!(matches!(
636            keyword_or_ident("xyzzy123"),
637            Token::Ident(s) if s == "xyzzy123"
638        ));
639    }
640}