Skip to main content

stryke/
token.rs

1#[derive(Debug, Clone, PartialEq)]
2pub enum Token {
3    // Literals
4    Integer(i64),
5    Float(f64),
6    SingleString(String),
7    DoubleString(String),
8    /// `` `...` `` or `qx{...}` — interpolated like double quotes, then executed as `sh -c` (Perl `qx`).
9    BacktickString(String),
10    /// Regex pattern: (pattern, flags, delimiter)
11    Regex(String, String, char),
12    HereDoc(String, String, bool),
13    QW(Vec<String>),
14
15    // Variables
16    ScalarVar(String),
17    /// `$$foo` — symbolic scalar deref (inner name is `foo` without sigil).
18    DerefScalarVar(String),
19    ArrayVar(String),
20    HashVar(String),
21    ArrayAt,
22    HashPercent,
23
24    // Identifiers & keywords
25    Ident(String),
26    Label(String),
27    PackageSep,
28    /// `format NAME =` … body … `.` (body lines without the closing `.`)
29    FormatDecl {
30        name: String,
31        lines: Vec<String>,
32    },
33
34    // Arithmetic
35    Plus,
36    Minus,
37    Star,
38    Slash,
39    Percent,
40    Power,
41
42    // String
43    Dot,
44    X,
45
46    // Comparison (numeric)
47    NumEq,
48    NumNe,
49    NumLt,
50    NumGt,
51    NumLe,
52    NumGe,
53    Spaceship,
54
55    // Comparison (string)
56    StrEq,
57    StrNe,
58    StrLt,
59    StrGt,
60    StrLe,
61    StrGe,
62    StrCmp,
63
64    // Logical
65    LogAnd,
66    LogOr,
67    LogNot,
68    LogAndWord,
69    LogOrWord,
70    LogNotWord,
71    DefinedOr,
72
73    // Bitwise
74    BitAnd,
75    BitOr,
76    BitXor,
77    BitNot,
78    ShiftLeft,
79    ShiftRight,
80
81    // Assignment
82    Assign,
83    PlusAssign,
84    MinusAssign,
85    MulAssign,
86    DivAssign,
87    ModAssign,
88    PowAssign,
89    DotAssign,
90    AndAssign,
91    OrAssign,
92    XorAssign,
93    ShiftLeftAssign,
94    ShiftRightAssign,
95    /// Bitwise `&=`
96    BitAndAssign,
97    /// Bitwise `|=`
98    BitOrAssign,
99    DefinedOrAssign,
100
101    // Increment/Decrement
102    Increment,
103    Decrement,
104
105    // Regex binding
106    BindMatch,
107    BindNotMatch,
108
109    // Arrows & separators
110    Arrow,
111    FatArrow,
112    /// `|>` — pipe-forward (F#/Elixir): `x |> f(a)` desugars to `f(x, a)` at parse time.
113    PipeForward,
114    /// `~>` — thread-first macro: `~> EXPR stage1 stage2 ...` injects as first arg
115    ThreadArrow,
116    /// `~>>` / `->>` — thread-last macro: injects as last arg
117    ThreadArrowLast,
118    /// Two-dot range / inclusive flip-flop (`..`).
119    Range,
120    /// Three-dot range / exclusive flip-flop (`...`); list expansion matches `..` (Perl).
121    RangeExclusive,
122    Backslash,
123
124    // Delimiters
125    LParen,
126    RParen,
127    LBracket,
128    RBracket,
129    LBrace,
130    RBrace,
131    /// `>{` — standalone block in thread macro (not attached to a function)
132    ArrowBrace,
133
134    // Punctuation
135    Semicolon,
136    Comma,
137    Question,
138    Colon,
139
140    // I/O
141    Diamond,
142    ReadLine(String),
143
144    // File tests
145    FileTest(char),
146
147    // Special
148    Eof,
149    Newline,
150}
151
152impl Token {
153    pub fn is_term_start(&self) -> bool {
154        matches!(
155            self,
156            Token::Integer(_)
157                | Token::Float(_)
158                | Token::SingleString(_)
159                | Token::DoubleString(_)
160                | Token::BacktickString(_)
161                | Token::ScalarVar(_)
162                | Token::DerefScalarVar(_)
163                | Token::ArrayVar(_)
164                | Token::HashVar(_)
165                | Token::Ident(_)
166                | Token::LParen
167                | Token::LBracket
168                | Token::LBrace
169                | Token::Backslash
170                | Token::Minus
171                | Token::LogNot
172                | Token::BitNot
173                | Token::LogNotWord
174                | Token::QW(_)
175                | Token::Regex(_, _, _)
176                | Token::FileTest(_)
177                | Token::ThreadArrow
178                | Token::ThreadArrowLast
179        )
180    }
181}
182
183/// Resolve an identifier to a keyword token or leave as Ident.
184pub fn keyword_or_ident(word: &str) -> Token {
185    match word {
186        "x" => Token::X,
187        "eq" => Token::StrEq,
188        "ne" => Token::StrNe,
189        "lt" => Token::StrLt,
190        "gt" => Token::StrGt,
191        "le" => Token::StrLe,
192        "ge" => Token::StrGe,
193        "cmp" => Token::StrCmp,
194        "and" => Token::LogAndWord,
195        "or" => Token::LogOrWord,
196        "not" => Token::LogNotWord,
197        _ => Token::Ident(word.to_string()),
198    }
199}
200
201/// All Perl keyword identifiers that are NOT converted to separate token variants.
202/// The parser recognizes these as `Token::Ident("keyword")`.
203pub const KEYWORDS: &[&str] = &[
204    "frozen",
205    "typed",
206    "my",
207    "mysync",
208    "our",
209    "local",
210    "sub",
211    "fn",
212    "struct",
213    "enum",
214    "class",
215    "trait",
216    "extends",
217    "impl",
218    "pub",
219    "priv",
220    "Self",
221    "return",
222    "if",
223    "elsif",
224    "else",
225    "unless",
226    "while",
227    "until",
228    "for",
229    "foreach",
230    "do",
231    "last",
232    "next",
233    "redo",
234    "use",
235    "no",
236    "require",
237    "package",
238    "bless",
239    "print",
240    "say",
241    "die",
242    "warn",
243    "chomp",
244    "chop",
245    "push",
246    "pop",
247    "shift",
248    "shuffle",
249    "chunked",
250    "windowed",
251    "unshift",
252    "splice",
253    "split",
254    "join",
255    "json_decode",
256    "json_encode",
257    "json_jq",
258    "jwt_decode",
259    "jwt_decode_unsafe",
260    "jwt_encode",
261    "log_debug",
262    "log_error",
263    "log_info",
264    "log_json",
265    "log_level",
266    "log_trace",
267    "log_warn",
268    "sha256",
269    "sha1",
270    "md5",
271    "hmac_sha256",
272    "hmac",
273    "uuid",
274    "base64_encode",
275    "base64_decode",
276    "hex_encode",
277    "hex_decode",
278    "gzip",
279    "gunzip",
280    "zstd",
281    "zstd_decode",
282    "datetime_utc",
283    "datetime_from_epoch",
284    "datetime_parse_rfc3339",
285    "datetime_strftime",
286    "toml_decode",
287    "toml_encode",
288    "yaml_decode",
289    "yaml_encode",
290    "url_encode",
291    "url_decode",
292    "uri_escape",
293    "uri_unescape",
294    "sort",
295    "reverse",
296    "reversed",
297    "map",
298    "maps",
299    "flat_map",
300    "flat_maps",
301    "flatten",
302    "compact",
303    "reject",
304    "concat",
305    "chain",
306    "set",
307    "list_count",
308    "list_size",
309    "count",
310    "size",
311    "cnt",
312    "inject",
313    "first",
314    "detect",
315    "find",
316    "find_all",
317    "match",
318    "grep",
319    "greps",
320    "keys",
321    "values",
322    "each",
323    "delete",
324    "exists",
325    "open",
326    "close",
327    "read",
328    "write",
329    "seek",
330    "tell",
331    "eof",
332    "defined",
333    "undef",
334    "ref",
335    "eval",
336    "exec",
337    "system",
338    "chdir",
339    "mkdir",
340    "rmdir",
341    "unlink",
342    "rename",
343    "chmod",
344    "chown",
345    "length",
346    "substr",
347    "index",
348    "rindex",
349    "sprintf",
350    "printf",
351    "lc",
352    "uc",
353    "lcfirst",
354    "ucfirst",
355    "hex",
356    "oct",
357    "int",
358    "abs",
359    "sqrt",
360    "scalar",
361    "wantarray",
362    "caller",
363    "exit",
364    "pos",
365    "quotemeta",
366    "chr",
367    "ord",
368    "pack",
369    "unpack",
370    "vec",
371    "tie",
372    "untie",
373    "tied",
374    "chomp",
375    "chop",
376    "defined",
377    "dump",
378    "each",
379    "exists",
380    "formline",
381    "lock",
382    "prototype",
383    "reset",
384    "scalar",
385    "BEGIN",
386    "END",
387    "INIT",
388    "CHECK",
389    "UNITCHECK",
390    "AUTOLOAD",
391    "DESTROY",
392    "all",
393    "any",
394    "none",
395    "take_while",
396    "drop_while",
397    "skip_while",
398    "skip",
399    "first_or",
400    "tap",
401    "peek",
402    "with_index",
403    "pmap",
404    "pflat_map",
405    "puniq",
406    "pfirst",
407    "pany",
408    "pmap_chunked",
409    "pipeline",
410    "pgrep",
411    "pfor",
412    "par_lines",
413    "par_walk",
414    "pwatch",
415    "psort",
416    "reduce",
417    "fold",
418    "preduce",
419    "preduce_init",
420    "pmap_reduce",
421    "pcache",
422    "watch",
423    "tie",
424    "fan",
425    "fan_cap",
426    "pchannel",
427    "pselect",
428    "uniq",
429    "distinct",
430    "uniqstr",
431    "uniqint",
432    "uniqnum",
433    "pairs",
434    "unpairs",
435    "pairkeys",
436    "pairvalues",
437    "pairgrep",
438    "pairmap",
439    "pairfirst",
440    "sample",
441    "zip",
442    "zip_shortest",
443    "mesh",
444    "mesh_shortest",
445    "notall",
446    "reductions",
447    "sum",
448    "sum0",
449    "product",
450    "min",
451    "max",
452    "minstr",
453    "maxstr",
454    "mean",
455    "median",
456    "mode",
457    "stddev",
458    "variance",
459    "async",
460    "spawn",
461    "trace",
462    "timer",
463    "bench",
464    "await",
465    "slurp",
466    "capture",
467    "fetch_url",
468    "fetch",
469    "fetch_json",
470    "fetch_async",
471    "fetch_async_json",
472    "json_jq",
473    "par_fetch",
474    "par_pipeline",
475    "par_csv_read",
476    "par_sed",
477    "try",
478    "catch",
479    "finally",
480    "given",
481    "when",
482    "default",
483    "eval_timeout",
484    "thread",
485    "t",
486];
487
488#[cfg(test)]
489mod tests {
490    use super::*;
491
492    #[test]
493    fn keyword_or_ident_maps_string_ops() {
494        assert!(matches!(keyword_or_ident("eq"), Token::StrEq));
495        assert!(matches!(keyword_or_ident("cmp"), Token::StrCmp));
496    }
497
498    #[test]
499    fn keyword_or_ident_non_keyword_is_ident() {
500        assert!(matches!(
501            keyword_or_ident("foo_bar"),
502            Token::Ident(s) if s == "foo_bar"
503        ));
504    }
505
506    #[test]
507    fn keyword_or_ident_logical_words_and_repeat() {
508        assert!(matches!(keyword_or_ident("and"), Token::LogAndWord));
509        assert!(matches!(keyword_or_ident("or"), Token::LogOrWord));
510        assert!(matches!(keyword_or_ident("not"), Token::LogNotWord));
511        assert!(matches!(keyword_or_ident("x"), Token::X));
512    }
513
514    #[test]
515    fn keyword_or_ident_string_comparison_words() {
516        assert!(matches!(keyword_or_ident("lt"), Token::StrLt));
517        assert!(matches!(keyword_or_ident("gt"), Token::StrGt));
518        assert!(matches!(keyword_or_ident("ge"), Token::StrGe));
519    }
520
521    #[test]
522    fn keyword_or_ident_string_le_ne() {
523        assert!(matches!(keyword_or_ident("le"), Token::StrLe));
524        assert!(matches!(keyword_or_ident("ne"), Token::StrNe));
525    }
526
527    #[test]
528    fn keyword_or_ident_control_flow_keywords() {
529        assert!(matches!(keyword_or_ident("if"), Token::Ident(s) if s == "if"));
530        assert!(matches!(keyword_or_ident("else"), Token::Ident(s) if s == "else"));
531        assert!(matches!(keyword_or_ident("elsif"), Token::Ident(s) if s == "elsif"));
532        assert!(matches!(keyword_or_ident("unless"), Token::Ident(s) if s == "unless"));
533        assert!(matches!(keyword_or_ident("while"), Token::Ident(s) if s == "while"));
534        assert!(matches!(keyword_or_ident("until"), Token::Ident(s) if s == "until"));
535        assert!(matches!(keyword_or_ident("for"), Token::Ident(s) if s == "for"));
536        assert!(matches!(keyword_or_ident("foreach"), Token::Ident(s) if s == "foreach"));
537        assert!(matches!(keyword_or_ident("return"), Token::Ident(s) if s == "return"));
538    }
539
540    #[test]
541    fn keyword_or_ident_declarations() {
542        assert!(matches!(keyword_or_ident("my"), Token::Ident(s) if s == "my"));
543        assert!(matches!(keyword_or_ident("typed"), Token::Ident(s) if s == "typed"));
544        assert!(matches!(keyword_or_ident("our"), Token::Ident(s) if s == "our"));
545        assert!(matches!(keyword_or_ident("local"), Token::Ident(s) if s == "local"));
546        assert!(matches!(keyword_or_ident("sub"), Token::Ident(s) if s == "sub"));
547        assert!(matches!(keyword_or_ident("package"), Token::Ident(s) if s == "package"));
548    }
549
550    #[test]
551    fn keyword_or_ident_io_and_list_ops() {
552        assert!(matches!(keyword_or_ident("print"), Token::Ident(s) if s == "print"));
553        assert!(matches!(keyword_or_ident("say"), Token::Ident(s) if s == "say"));
554        assert!(matches!(keyword_or_ident("map"), Token::Ident(s) if s == "map"));
555        assert!(matches!(keyword_or_ident("grep"), Token::Ident(s) if s == "grep"));
556        assert!(matches!(keyword_or_ident("sort"), Token::Ident(s) if s == "sort"));
557        assert!(matches!(keyword_or_ident("join"), Token::Ident(s) if s == "join"));
558        assert!(matches!(keyword_or_ident("split"), Token::Ident(s) if s == "split"));
559        assert!(matches!(
560            keyword_or_ident("list_count"),
561            Token::Ident(s) if s == "list_count"
562        ));
563        assert!(matches!(
564            keyword_or_ident("list_size"),
565            Token::Ident(s) if s == "list_size"
566        ));
567        assert!(matches!(keyword_or_ident("cnt"), Token::Ident(s) if s == "cnt"));
568        assert!(matches!(
569            keyword_or_ident("capture"),
570            Token::Ident(s) if s == "capture"
571        ));
572    }
573
574    #[test]
575    fn keyword_or_ident_parallel_primitives() {
576        assert!(matches!(keyword_or_ident("pmap"), Token::Ident(s) if s == "pmap"));
577        assert!(matches!(
578            keyword_or_ident("pmap_chunked"),
579            Token::Ident(s) if s == "pmap_chunked"
580        ));
581        assert!(matches!(
582            keyword_or_ident("pipeline"),
583            Token::Ident(s) if s == "pipeline"
584        ));
585        assert!(matches!(keyword_or_ident("pgrep"), Token::Ident(s) if s == "pgrep"));
586        assert!(matches!(keyword_or_ident("pfor"), Token::Ident(s) if s == "pfor"));
587        assert!(matches!(keyword_or_ident("psort"), Token::Ident(s) if s == "psort"));
588        assert!(matches!(keyword_or_ident("reduce"), Token::Ident(s) if s == "reduce"));
589        assert!(matches!(keyword_or_ident("fold"), Token::Ident(s) if s == "fold"));
590        assert!(matches!(keyword_or_ident("preduce"), Token::Ident(s) if s == "preduce"));
591        assert!(matches!(keyword_or_ident("fan"), Token::Ident(s) if s == "fan"));
592        assert!(matches!(keyword_or_ident("trace"), Token::Ident(s) if s == "trace"));
593        assert!(matches!(keyword_or_ident("timer"), Token::Ident(s) if s == "timer"));
594    }
595
596    #[test]
597    fn keyword_or_ident_type_and_ref() {
598        assert!(matches!(keyword_or_ident("ref"), Token::Ident(s) if s == "ref"));
599        assert!(matches!(keyword_or_ident("scalar"), Token::Ident(s) if s == "scalar"));
600        assert!(matches!(keyword_or_ident("defined"), Token::Ident(s) if s == "defined"));
601        assert!(matches!(keyword_or_ident("undef"), Token::Ident(s) if s == "undef"));
602    }
603
604    #[test]
605    fn keyword_or_ident_block_hooks() {
606        assert!(matches!(keyword_or_ident("BEGIN"), Token::Ident(s) if s == "BEGIN"));
607        assert!(matches!(keyword_or_ident("END"), Token::Ident(s) if s == "END"));
608        assert!(matches!(keyword_or_ident("INIT"), Token::Ident(s) if s == "INIT"));
609    }
610
611    #[test]
612    fn keyword_or_ident_plain_identifier_untouched() {
613        assert!(matches!(
614            keyword_or_ident("xyzzy123"),
615            Token::Ident(s) if s == "xyzzy123"
616        ));
617    }
618}