miden_assembly_syntax/parser/
token.rs

1use alloc::string::String;
2use core::fmt;
3
4use miden_core::Felt;
5
6// DOCUMENTATION TYPE
7// ================================================================================================
8
9/// Represents the scope of a given documentation comment
10#[derive(Debug, Clone)]
11pub enum DocumentationType {
12    Module(String),
13    Form(String),
14}
15
16impl From<DocumentationType> for String {
17    fn from(doc: DocumentationType) -> Self {
18        match doc {
19            DocumentationType::Module(s) => s,
20            DocumentationType::Form(s) => s,
21        }
22    }
23}
24
25impl core::ops::Deref for DocumentationType {
26    type Target = String;
27    fn deref(&self) -> &Self::Target {
28        match self {
29            Self::Module(s) => s,
30            Self::Form(s) => s,
31        }
32    }
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub struct WordValue(pub [Felt; 4]);
37
38impl fmt::Display for WordValue {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        write!(
41            f,
42            "{:#08x}{:08x}{:08x}{:08x}",
43            &self.0[0].as_int(),
44            &self.0[1].as_int(),
45            &self.0[2].as_int(),
46            &self.0[3].as_int(),
47        )
48    }
49}
50
51impl crate::prettier::PrettyPrint for WordValue {
52    fn render(&self) -> crate::prettier::Document {
53        use crate::prettier::*;
54
55        const_text("[")
56            + self
57                .0
58                .iter()
59                .copied()
60                .map(display)
61                .reduce(|acc, doc| acc + const_text(",") + doc)
62                .unwrap_or_default()
63            + const_text("]")
64    }
65}
66
67impl PartialOrd for WordValue {
68    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
69        Some(self.cmp(other))
70    }
71}
72impl Ord for WordValue {
73    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
74        let (WordValue([l0, l1, l2, l3]), WordValue([r0, r1, r2, r3])) = (self, other);
75        l0.as_int()
76            .cmp(&r0.as_int())
77            .then_with(|| l1.as_int().cmp(&r1.as_int()))
78            .then_with(|| l2.as_int().cmp(&r2.as_int()))
79            .then_with(|| l3.as_int().cmp(&r3.as_int()))
80    }
81}
82
83impl core::hash::Hash for WordValue {
84    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
85        let WordValue([a, b, c, d]) = self;
86        [a.as_int(), b.as_int(), c.as_int(), d.as_int()].hash(state)
87    }
88}
89
90// HEX ENCODED VALUE
91// ================================================================================================
92
93/// Represents one of the various types of values that have a hex-encoded representation in Miden
94/// Assembly source files.
95#[derive(Debug, Copy, Clone, PartialEq, Eq)]
96pub enum IntValue {
97    /// A tiny value
98    U8(u8),
99    /// A small value
100    U16(u16),
101    /// A u32 constant, typically represents a memory address
102    U32(u32),
103    /// A single field element, 8 bytes, encoded as 16 hex digits
104    Felt(Felt),
105    /// A set of 4 field elements, 32 bytes, encoded as a contiguous string of 64 hex digits
106    Word(WordValue),
107}
108impl fmt::Display for IntValue {
109    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110        match self {
111            Self::U8(value) => write!(f, "{value}"),
112            Self::U16(value) => write!(f, "{value}"),
113            Self::U32(value) => write!(f, "{value:#04x}"),
114            Self::Felt(value) => write!(f, "{:#08x}", &value.as_int().to_be()),
115            Self::Word(value) => write!(f, "{value}"),
116        }
117    }
118}
119
120impl crate::prettier::PrettyPrint for IntValue {
121    fn render(&self) -> crate::prettier::Document {
122        match self {
123            Self::U8(v) => v.render(),
124            Self::U16(v) => v.render(),
125            Self::U32(v) => v.render(),
126            Self::Felt(v) => u64::from(*v).render(),
127            Self::Word(v) => v.render(),
128        }
129    }
130}
131
132impl PartialOrd for IntValue {
133    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
134        Some(self.cmp(other))
135    }
136}
137impl Ord for IntValue {
138    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
139        use core::cmp::Ordering;
140        match (self, other) {
141            (Self::U8(l), Self::U8(r)) => l.cmp(r),
142            (Self::U8(_), _) => Ordering::Less,
143            (Self::U16(_), Self::U8(_)) => Ordering::Greater,
144            (Self::U16(l), Self::U16(r)) => l.cmp(r),
145            (Self::U16(_), _) => Ordering::Less,
146            (Self::U32(_), Self::U8(_) | Self::U16(_)) => Ordering::Greater,
147            (Self::U32(l), Self::U32(r)) => l.cmp(r),
148            (Self::U32(_), _) => Ordering::Less,
149            (Self::Felt(_), Self::U8(_) | Self::U16(_) | Self::U32(_)) => Ordering::Greater,
150            (Self::Felt(l), Self::Felt(r)) => l.as_int().cmp(&r.as_int()),
151            (Self::Felt(_), _) => Ordering::Less,
152            (Self::Word(l), Self::Word(r)) => l.cmp(r),
153            (Self::Word(_), _) => Ordering::Greater,
154        }
155    }
156}
157
158impl core::hash::Hash for IntValue {
159    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
160        core::mem::discriminant(self).hash(state);
161        match self {
162            Self::U8(value) => value.hash(state),
163            Self::U16(value) => value.hash(state),
164            Self::U32(value) => value.hash(state),
165            Self::Felt(value) => value.as_int().hash(state),
166            Self::Word(value) => value.hash(state),
167        }
168    }
169}
170
171// BINARY ENCODED VALUE
172// ================================================================================================
173
174/// Represents one of the various types of values that have a hex-encoded representation in Miden
175/// Assembly source files.
176#[derive(Debug, Copy, Clone, PartialEq, Eq)]
177pub enum BinEncodedValue {
178    /// A tiny value
179    U8(u8),
180    /// A small value
181    U16(u16),
182    /// A u32 constant, typically represents a memory address
183    U32(u32),
184}
185
186// TOKEN
187// ================================================================================================
188
189/// The token type produced by [crate::parser::Lexer], and consumed by the parser.
190#[derive(Debug, Clone)]
191pub enum Token<'input> {
192    Add,
193    Adv,
194    AdvMap,
195    InsertHdword,
196    InsertHdwordWithDomain,
197    InsertHperm,
198    InsertMem,
199    AdvLoadw,
200    AdvPipe,
201    AdvPush,
202    AdvStack,
203    PushMapval,
204    PushMapvaln,
205    PushMtnode,
206    PushSmtpeek,
207    PushSmtset,
208    PushSmtget,
209    PushU64Div,
210    PushFalconDiv,
211    And,
212    Assert,
213    Assertz,
214    AssertEq,
215    AssertEqw,
216    EvalCircuit,
217    Begin,
218    Breakpoint,
219    Caller,
220    Call,
221    Cdrop,
222    Cdropw,
223    Clk,
224    Const,
225    Cswap,
226    Cswapw,
227    Debug,
228    Div,
229    Drop,
230    Dropw,
231    Dup,
232    Dupw,
233    Dynexec,
234    Dyncall,
235    Else,
236    Emit,
237    End,
238    Eq,
239    Eqw,
240    Ext2Add,
241    Ext2Div,
242    Ext2Inv,
243    Ext2Mul,
244    Ext2Neg,
245    Ext2Sub,
246    Err,
247    Exec,
248    Export,
249    Exp,
250    ExpU,
251    False,
252    FriExt2Fold4,
253    Gt,
254    Gte,
255    Hash,
256    HasMapkey,
257    HornerBase,
258    HornerExt,
259    Hperm,
260    Hmerge,
261    If,
262    ILog2,
263    Inv,
264    IsOdd,
265    Local,
266    Locaddr,
267    LocLoad,
268    LocLoadw,
269    LocStore,
270    LocStorew,
271    Lt,
272    Lte,
273    Mem,
274    MemLoad,
275    MemLoadw,
276    MemStore,
277    MemStorew,
278    MemStream,
279    Movdn,
280    Movdnw,
281    Movup,
282    Movupw,
283    MtreeGet,
284    MtreeMerge,
285    MtreeSet,
286    MtreeVerify,
287    Mul,
288    Neg,
289    Neq,
290    Not,
291    Nop,
292    Or,
293    Padw,
294    Pow2,
295    Proc,
296    Procref,
297    Push,
298    Repeat,
299    Sdepth,
300    Stack,
301    Sub,
302    Swap,
303    Swapw,
304    Swapdw,
305    Syscall,
306    Trace,
307    True,
308    Use,
309    U32And,
310    U32Assert,
311    U32Assert2,
312    U32Assertw,
313    U32Cast,
314    U32Div,
315    U32Divmod,
316    U32Gt,
317    U32Gte,
318    U32Lt,
319    U32Lte,
320    U32Max,
321    U32Min,
322    U32Mod,
323    U32Not,
324    U32Or,
325    U32OverflowingAdd,
326    U32OverflowingAdd3,
327    U32OverflowingMadd,
328    U32OverflowingMul,
329    U32OverflowingSub,
330    U32Popcnt,
331    U32Clz,
332    U32Ctz,
333    U32Clo,
334    U32Cto,
335    U32Rotl,
336    U32Rotr,
337    U32Shl,
338    U32Shr,
339    U32Split,
340    U32Test,
341    U32Testw,
342    U32WrappingAdd,
343    U32WrappingAdd3,
344    U32WrappingMadd,
345    U32WrappingMul,
346    U32WrappingSub,
347    U32Xor,
348    While,
349    Xor,
350    At,
351    Bang,
352    ColonColon,
353    Dot,
354    Comma,
355    Equal,
356    Lparen,
357    Lbracket,
358    Minus,
359    Plus,
360    SlashSlash,
361    Slash,
362    Star,
363    Rparen,
364    Rbracket,
365    Rstab,
366    DocComment(DocumentationType),
367    HexValue(IntValue),
368    BinValue(BinEncodedValue),
369    Int(u64),
370    Ident(&'input str),
371    ConstantIdent(&'input str),
372    QuotedIdent(&'input str),
373    QuotedString(&'input str),
374    Comment,
375    Eof,
376}
377
378impl fmt::Display for Token<'_> {
379    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
380        match self {
381            Token::Add => write!(f, "add"),
382            Token::Adv => write!(f, "adv"),
383            Token::AdvMap => write!(f, "adv_map"),
384            Token::AdvStack => write!(f, "adv_stack"),
385            Token::InsertHdword => write!(f, "insert_hdword"),
386            Token::InsertHdwordWithDomain => write!(f, "insert_hdword_d"),
387            Token::InsertHperm => write!(f, "insert_hperm"),
388            Token::InsertMem => write!(f, "insert_mem"),
389            Token::AdvLoadw => write!(f, "adv_loadw"),
390            Token::AdvPipe => write!(f, "adv_pipe"),
391            Token::AdvPush => write!(f, "adv_push"),
392            Token::PushMapval => write!(f, "push_mapval"),
393            Token::PushMapvaln => write!(f, "push_mapvaln"),
394            Token::PushMtnode => write!(f, "push_mtnode"),
395            Token::PushSmtpeek => write!(f, "push_smtpeek"),
396            Token::PushSmtset => write!(f, "push_smtset"),
397            Token::PushSmtget => write!(f, "push_smtget"),
398            Token::PushU64Div => write!(f, "push_u64div"),
399            Token::PushFalconDiv => write!(f, "push_falcon_div"),
400            Token::And => write!(f, "and"),
401            Token::Assert => write!(f, "assert"),
402            Token::Assertz => write!(f, "assertz"),
403            Token::AssertEq => write!(f, "assert_eq"),
404            Token::AssertEqw => write!(f, "assert_eqw"),
405            Token::EvalCircuit => write!(f, "eval_circuit"),
406            Token::Begin => write!(f, "begin"),
407            Token::Breakpoint => write!(f, "breakpoint"),
408            Token::Caller => write!(f, "caller"),
409            Token::Call => write!(f, "call"),
410            Token::Cdrop => write!(f, "cdrop"),
411            Token::Cdropw => write!(f, "cdropw"),
412            Token::Clk => write!(f, "clk"),
413            Token::Const => write!(f, "const"),
414            Token::Cswap => write!(f, "cswap"),
415            Token::Cswapw => write!(f, "cswapw"),
416            Token::Debug => write!(f, "debug"),
417            Token::Div => write!(f, "div"),
418            Token::Drop => write!(f, "drop"),
419            Token::Dropw => write!(f, "dropw"),
420            Token::Dup => write!(f, "dup"),
421            Token::Dupw => write!(f, "dupw"),
422            Token::Dynexec => write!(f, "dynexec"),
423            Token::Dyncall => write!(f, "dyncall"),
424            Token::Else => write!(f, "else"),
425            Token::Emit => write!(f, "emit"),
426            Token::End => write!(f, "end"),
427            Token::Eq => write!(f, "eq"),
428            Token::Eqw => write!(f, "eqw"),
429            Token::Ext2Add => write!(f, "ext2add"),
430            Token::Ext2Div => write!(f, "ext2div"),
431            Token::Ext2Inv => write!(f, "ext2inv"),
432            Token::Ext2Mul => write!(f, "ext2mul"),
433            Token::Ext2Neg => write!(f, "ext2neg"),
434            Token::Ext2Sub => write!(f, "ext2sub"),
435            Token::Err => write!(f, "err"),
436            Token::Exec => write!(f, "exec"),
437            Token::Exp => write!(f, "exp"),
438            Token::ExpU => write!(f, "exp.u"),
439            Token::Export => write!(f, "export"),
440            Token::False => write!(f, "false"),
441            Token::FriExt2Fold4 => write!(f, "fri_ext2fold4"),
442            Token::Gt => write!(f, "gt"),
443            Token::Gte => write!(f, "gte"),
444            Token::Hash => write!(f, "hash"),
445            Token::HasMapkey => write!(f, "has_mapkey"),
446            Token::Hperm => write!(f, "hperm"),
447            Token::Hmerge => write!(f, "hmerge"),
448            Token::If => write!(f, "if"),
449            Token::ILog2 => write!(f, "ilog2"),
450            Token::Inv => write!(f, "inv"),
451            Token::IsOdd => write!(f, "is_odd"),
452            Token::Local => write!(f, "local"),
453            Token::Locaddr => write!(f, "locaddr"),
454            Token::LocLoad => write!(f, "loc_load"),
455            Token::LocLoadw => write!(f, "loc_loadw"),
456            Token::LocStore => write!(f, "loc_store"),
457            Token::LocStorew => write!(f, "loc_storew"),
458            Token::Lt => write!(f, "lt"),
459            Token::Lte => write!(f, "lte"),
460            Token::Mem => write!(f, "mem"),
461            Token::MemLoad => write!(f, "mem_load"),
462            Token::MemLoadw => write!(f, "mem_loadw"),
463            Token::MemStore => write!(f, "mem_store"),
464            Token::MemStorew => write!(f, "mem_storew"),
465            Token::MemStream => write!(f, "mem_stream"),
466            Token::Movdn => write!(f, "movdn"),
467            Token::Movdnw => write!(f, "movdnw"),
468            Token::Movup => write!(f, "movup"),
469            Token::Movupw => write!(f, "movupw"),
470            Token::MtreeGet => write!(f, "mtree_get"),
471            Token::MtreeMerge => write!(f, "mtree_merge"),
472            Token::MtreeSet => write!(f, "mtree_set"),
473            Token::MtreeVerify => write!(f, "mtree_verify"),
474            Token::Mul => write!(f, "mul"),
475            Token::Neg => write!(f, "neg"),
476            Token::Neq => write!(f, "neq"),
477            Token::Not => write!(f, "not"),
478            Token::Nop => write!(f, "nop"),
479            Token::Or => write!(f, "or"),
480            Token::Padw => write!(f, "padw"),
481            Token::Pow2 => write!(f, "pow2"),
482            Token::Proc => write!(f, "proc"),
483            Token::Procref => write!(f, "procref"),
484            Token::Push => write!(f, "push"),
485            Token::HornerBase => write!(f, "horner_eval_base"),
486            Token::HornerExt => write!(f, "horner_eval_ext"),
487            Token::Repeat => write!(f, "repeat"),
488            Token::Sdepth => write!(f, "sdepth"),
489            Token::Stack => write!(f, "stack"),
490            Token::Sub => write!(f, "sub"),
491            Token::Swap => write!(f, "swap"),
492            Token::Swapw => write!(f, "swapw"),
493            Token::Swapdw => write!(f, "swapdw"),
494            Token::Syscall => write!(f, "syscall"),
495            Token::Trace => write!(f, "trace"),
496            Token::True => write!(f, "true"),
497            Token::Use => write!(f, "use"),
498            Token::U32And => write!(f, "u32and"),
499            Token::U32Assert => write!(f, "u32assert"),
500            Token::U32Assert2 => write!(f, "u32assert2"),
501            Token::U32Assertw => write!(f, "u32assertw"),
502            Token::U32Cast => write!(f, "u32cast"),
503            Token::U32Div => write!(f, "u32div"),
504            Token::U32Divmod => write!(f, "u32divmod"),
505            Token::U32Gt => write!(f, "u32gt"),
506            Token::U32Gte => write!(f, "u32gte"),
507            Token::U32Lt => write!(f, "u32lt"),
508            Token::U32Lte => write!(f, "u32lte"),
509            Token::U32Max => write!(f, "u32max"),
510            Token::U32Min => write!(f, "u32min"),
511            Token::U32Mod => write!(f, "u32mod"),
512            Token::U32Not => write!(f, "u32not"),
513            Token::U32Or => write!(f, "u32or"),
514            Token::U32OverflowingAdd => write!(f, "u32overflowing_add"),
515            Token::U32OverflowingAdd3 => write!(f, "u32overflowing_add3"),
516            Token::U32OverflowingMadd => write!(f, "u32overflowing_madd"),
517            Token::U32OverflowingMul => write!(f, "u32overflowing_mul"),
518            Token::U32OverflowingSub => write!(f, "u32overflowing_sub"),
519            Token::U32Popcnt => write!(f, "u32popcnt"),
520            Token::U32Clz => write!(f, "u32clz"),
521            Token::U32Ctz => write!(f, "u32ctz"),
522            Token::U32Clo => write!(f, "u32clo"),
523            Token::U32Cto => write!(f, "u32cto"),
524            Token::U32Rotl => write!(f, "u32rotl"),
525            Token::U32Rotr => write!(f, "u32rotr"),
526            Token::U32Shl => write!(f, "u32shl"),
527            Token::U32Shr => write!(f, "u32shr"),
528            Token::U32Split => write!(f, "u32split"),
529            Token::U32Test => write!(f, "u32test"),
530            Token::U32Testw => write!(f, "u32testw"),
531            Token::U32WrappingAdd => write!(f, "u32wrapping_add"),
532            Token::U32WrappingAdd3 => write!(f, "u32wrapping_add3"),
533            Token::U32WrappingMadd => write!(f, "u32wrapping_madd"),
534            Token::U32WrappingMul => write!(f, "u32wrapping_mul"),
535            Token::U32WrappingSub => write!(f, "u32wrapping_sub"),
536            Token::U32Xor => write!(f, "u32xor"),
537            Token::While => write!(f, "while"),
538            Token::Xor => write!(f, "xor"),
539            Token::At => write!(f, "@"),
540            Token::Bang => write!(f, "!"),
541            Token::ColonColon => write!(f, "::"),
542            Token::Dot => write!(f, "."),
543            Token::Comma => write!(f, ","),
544            Token::Equal => write!(f, "="),
545            Token::Lparen => write!(f, "("),
546            Token::Lbracket => write!(f, "["),
547            Token::Minus => write!(f, "-"),
548            Token::Plus => write!(f, "+"),
549            Token::SlashSlash => write!(f, "//"),
550            Token::Slash => write!(f, "/"),
551            Token::Star => write!(f, "*"),
552            Token::Rparen => write!(f, ")"),
553            Token::Rbracket => write!(f, "]"),
554            Token::Rstab => write!(f, "->"),
555            Token::DocComment(DocumentationType::Module(_)) => f.write_str("module doc"),
556            Token::DocComment(DocumentationType::Form(_)) => f.write_str("doc comment"),
557            Token::HexValue(_) => f.write_str("hex-encoded value"),
558            Token::BinValue(_) => f.write_str("bin-encoded value"),
559            Token::Int(_) => f.write_str("integer"),
560            Token::Ident(_) => f.write_str("identifier"),
561            Token::ConstantIdent(_) => f.write_str("constant identifier"),
562            Token::QuotedIdent(_) => f.write_str("quoted identifier"),
563            Token::QuotedString(_) => f.write_str("quoted string"),
564            Token::Comment => f.write_str("comment"),
565            Token::Eof => write!(f, "end of file"),
566        }
567    }
568}
569
570impl<'input> Token<'input> {
571    /// Returns true if this token represents the name of an instruction.
572    ///
573    /// This is used to simplify diagnostic output related to expected tokens so as not to
574    /// overwhelm the user with a ton of possible expected instruction variants.
575    pub fn is_instruction(&self) -> bool {
576        matches!(
577            self,
578            Token::Add
579                | Token::Adv
580                | Token::InsertHdword
581                | Token::InsertHdwordWithDomain
582                | Token::InsertHperm
583                | Token::InsertMem
584                | Token::AdvLoadw
585                | Token::AdvPipe
586                | Token::AdvPush
587                | Token::AdvStack
588                | Token::PushMapval
589                | Token::PushMapvaln
590                | Token::PushMtnode
591                | Token::PushSmtpeek
592                | Token::PushSmtset
593                | Token::PushSmtget
594                | Token::PushU64Div
595                | Token::PushFalconDiv
596                | Token::And
597                | Token::Assert
598                | Token::Assertz
599                | Token::AssertEq
600                | Token::AssertEqw
601                | Token::EvalCircuit
602                | Token::Breakpoint
603                | Token::Caller
604                | Token::Call
605                | Token::Cdrop
606                | Token::Cdropw
607                | Token::Clk
608                | Token::Cswap
609                | Token::Cswapw
610                | Token::Debug
611                | Token::Div
612                | Token::Drop
613                | Token::Dropw
614                | Token::Dup
615                | Token::Dupw
616                | Token::Dynexec
617                | Token::Dyncall
618                | Token::Emit
619                | Token::Eq
620                | Token::Eqw
621                | Token::Ext2Add
622                | Token::Ext2Div
623                | Token::Ext2Inv
624                | Token::Ext2Mul
625                | Token::Ext2Neg
626                | Token::Ext2Sub
627                | Token::Exec
628                | Token::Exp
629                | Token::ExpU
630                | Token::FriExt2Fold4
631                | Token::Gt
632                | Token::Gte
633                | Token::Hash
634                | Token::Hperm
635                | Token::Hmerge
636                | Token::HornerBase
637                | Token::HornerExt
638                | Token::ILog2
639                | Token::Inv
640                | Token::IsOdd
641                | Token::Local
642                | Token::Locaddr
643                | Token::LocLoad
644                | Token::LocLoadw
645                | Token::LocStore
646                | Token::LocStorew
647                | Token::Lt
648                | Token::Lte
649                | Token::Mem
650                | Token::MemLoad
651                | Token::MemLoadw
652                | Token::MemStore
653                | Token::MemStorew
654                | Token::MemStream
655                | Token::Movdn
656                | Token::Movdnw
657                | Token::Movup
658                | Token::Movupw
659                | Token::MtreeGet
660                | Token::MtreeMerge
661                | Token::MtreeSet
662                | Token::MtreeVerify
663                | Token::Mul
664                | Token::Neg
665                | Token::Neq
666                | Token::Not
667                | Token::Nop
668                | Token::Or
669                | Token::Padw
670                | Token::Pow2
671                | Token::Procref
672                | Token::Push
673                | Token::Repeat
674                | Token::Sdepth
675                | Token::Stack
676                | Token::Sub
677                | Token::Swap
678                | Token::Swapw
679                | Token::Swapdw
680                | Token::Syscall
681                | Token::Trace
682                | Token::U32And
683                | Token::U32Assert
684                | Token::U32Assert2
685                | Token::U32Assertw
686                | Token::U32Cast
687                | Token::U32Div
688                | Token::U32Divmod
689                | Token::U32Gt
690                | Token::U32Gte
691                | Token::U32Lt
692                | Token::U32Lte
693                | Token::U32Max
694                | Token::U32Min
695                | Token::U32Mod
696                | Token::U32Not
697                | Token::U32Or
698                | Token::U32OverflowingAdd
699                | Token::U32OverflowingAdd3
700                | Token::U32OverflowingMadd
701                | Token::U32OverflowingMul
702                | Token::U32OverflowingSub
703                | Token::U32Popcnt
704                | Token::U32Clz
705                | Token::U32Ctz
706                | Token::U32Clo
707                | Token::U32Cto
708                | Token::U32Rotl
709                | Token::U32Rotr
710                | Token::U32Shl
711                | Token::U32Shr
712                | Token::U32Split
713                | Token::U32Test
714                | Token::U32Testw
715                | Token::U32WrappingAdd
716                | Token::U32WrappingAdd3
717                | Token::U32WrappingMadd
718                | Token::U32WrappingMul
719                | Token::U32WrappingSub
720                | Token::U32Xor
721                | Token::Xor
722        )
723    }
724
725    const KEYWORDS: &'static [(&'static str, Token<'static>)] = &[
726        ("add", Token::Add),
727        ("adv", Token::Adv),
728        ("adv_map", Token::AdvMap),
729        ("eval_circuit", Token::EvalCircuit),
730        ("insert_hdword", Token::InsertHdword),
731        ("insert_hdword_d", Token::InsertHdwordWithDomain),
732        ("insert_hperm", Token::InsertHperm),
733        ("insert_mem", Token::InsertMem),
734        ("adv_loadw", Token::AdvLoadw),
735        ("adv_pipe", Token::AdvPipe),
736        ("adv_push", Token::AdvPush),
737        ("adv_stack", Token::AdvStack),
738        ("push_mapval", Token::PushMapval),
739        ("push_mapvaln", Token::PushMapvaln),
740        ("push_mtnode", Token::PushMtnode),
741        ("push_smtpeek", Token::PushSmtpeek),
742        ("push_smtset", Token::PushSmtset),
743        ("push_smtget", Token::PushSmtget),
744        ("push_u64div", Token::PushU64Div),
745        ("push_falcon_div", Token::PushFalconDiv),
746        ("and", Token::And),
747        ("assert", Token::Assert),
748        ("assertz", Token::Assertz),
749        ("assert_eq", Token::AssertEq),
750        ("assert_eqw", Token::AssertEqw),
751        ("begin", Token::Begin),
752        ("breakpoint", Token::Breakpoint),
753        ("caller", Token::Caller),
754        ("call", Token::Call),
755        ("cdrop", Token::Cdrop),
756        ("cdropw", Token::Cdropw),
757        ("clk", Token::Clk),
758        ("const", Token::Const),
759        ("cswap", Token::Cswap),
760        ("cswapw", Token::Cswapw),
761        ("debug", Token::Debug),
762        ("div", Token::Div),
763        ("drop", Token::Drop),
764        ("dropw", Token::Dropw),
765        ("dup", Token::Dup),
766        ("dupw", Token::Dupw),
767        ("dynexec", Token::Dynexec),
768        ("dyncall", Token::Dyncall),
769        ("else", Token::Else),
770        ("emit", Token::Emit),
771        ("end", Token::End),
772        ("eq", Token::Eq),
773        ("eqw", Token::Eqw),
774        ("ext2add", Token::Ext2Add),
775        ("ext2div", Token::Ext2Div),
776        ("ext2inv", Token::Ext2Inv),
777        ("ext2mul", Token::Ext2Mul),
778        ("ext2neg", Token::Ext2Neg),
779        ("ext2sub", Token::Ext2Sub),
780        ("err", Token::Err),
781        ("exec", Token::Exec),
782        ("exp", Token::Exp),
783        ("exp.u", Token::ExpU),
784        ("export", Token::Export),
785        ("false", Token::False),
786        ("fri_ext2fold4", Token::FriExt2Fold4),
787        ("gt", Token::Gt),
788        ("gte", Token::Gte),
789        ("hash", Token::Hash),
790        ("has_mapkey", Token::HasMapkey),
791        ("hperm", Token::Hperm),
792        ("hmerge", Token::Hmerge),
793        ("if", Token::If),
794        ("ilog2", Token::ILog2),
795        ("inv", Token::Inv),
796        ("is_odd", Token::IsOdd),
797        ("local", Token::Local),
798        ("locaddr", Token::Locaddr),
799        ("loc_load", Token::LocLoad),
800        ("loc_loadw", Token::LocLoadw),
801        ("loc_store", Token::LocStore),
802        ("loc_storew", Token::LocStorew),
803        ("lt", Token::Lt),
804        ("lte", Token::Lte),
805        ("mem", Token::Mem),
806        ("mem_load", Token::MemLoad),
807        ("mem_loadw", Token::MemLoadw),
808        ("mem_store", Token::MemStore),
809        ("mem_storew", Token::MemStorew),
810        ("mem_stream", Token::MemStream),
811        ("movdn", Token::Movdn),
812        ("movdnw", Token::Movdnw),
813        ("movup", Token::Movup),
814        ("movupw", Token::Movupw),
815        ("mtree_get", Token::MtreeGet),
816        ("mtree_merge", Token::MtreeMerge),
817        ("mtree_set", Token::MtreeSet),
818        ("mtree_verify", Token::MtreeVerify),
819        ("mul", Token::Mul),
820        ("neg", Token::Neg),
821        ("neq", Token::Neq),
822        ("not", Token::Not),
823        ("nop", Token::Nop),
824        ("or", Token::Or),
825        ("padw", Token::Padw),
826        ("pow2", Token::Pow2),
827        ("proc", Token::Proc),
828        ("procref", Token::Procref),
829        ("push", Token::Push),
830        ("horner_eval_base", Token::HornerBase),
831        ("horner_eval_ext", Token::HornerExt),
832        ("repeat", Token::Repeat),
833        ("sdepth", Token::Sdepth),
834        ("stack", Token::Stack),
835        ("sub", Token::Sub),
836        ("swap", Token::Swap),
837        ("swapw", Token::Swapw),
838        ("swapdw", Token::Swapdw),
839        ("syscall", Token::Syscall),
840        ("trace", Token::Trace),
841        ("true", Token::True),
842        ("use", Token::Use),
843        ("u32and", Token::U32And),
844        ("u32assert", Token::U32Assert),
845        ("u32assert2", Token::U32Assert2),
846        ("u32assertw", Token::U32Assertw),
847        ("u32cast", Token::U32Cast),
848        ("u32div", Token::U32Div),
849        ("u32divmod", Token::U32Divmod),
850        ("u32gt", Token::U32Gt),
851        ("u32gte", Token::U32Gte),
852        ("u32lt", Token::U32Lt),
853        ("u32lte", Token::U32Lte),
854        ("u32max", Token::U32Max),
855        ("u32min", Token::U32Min),
856        ("u32mod", Token::U32Mod),
857        ("u32not", Token::U32Not),
858        ("u32or", Token::U32Or),
859        ("u32overflowing_add", Token::U32OverflowingAdd),
860        ("u32overflowing_add3", Token::U32OverflowingAdd3),
861        ("u32overflowing_madd", Token::U32OverflowingMadd),
862        ("u32overflowing_mul", Token::U32OverflowingMul),
863        ("u32overflowing_sub", Token::U32OverflowingSub),
864        ("u32popcnt", Token::U32Popcnt),
865        ("u32clz", Token::U32Clz),
866        ("u32ctz", Token::U32Ctz),
867        ("u32clo", Token::U32Clo),
868        ("u32cto", Token::U32Cto),
869        ("u32rotl", Token::U32Rotl),
870        ("u32rotr", Token::U32Rotr),
871        ("u32shl", Token::U32Shl),
872        ("u32shr", Token::U32Shr),
873        ("u32split", Token::U32Split),
874        ("u32test", Token::U32Test),
875        ("u32testw", Token::U32Testw),
876        ("u32wrapping_add", Token::U32WrappingAdd),
877        ("u32wrapping_add3", Token::U32WrappingAdd3),
878        ("u32wrapping_madd", Token::U32WrappingMadd),
879        ("u32wrapping_mul", Token::U32WrappingMul),
880        ("u32wrapping_sub", Token::U32WrappingSub),
881        ("u32xor", Token::U32Xor),
882        ("while", Token::While),
883        ("xor", Token::Xor),
884    ];
885
886    /// Constructs a DFA capable of recognizing Miden Assembly keywords.
887    ///
888    /// Constructing the state machine is expensive, so it should not be done in hot code. Instead,
889    /// prefer to construct it once and reuse it many times.
890    ///
891    /// Currently we construct an instance of this searcher in the lexer, which is then used to
892    /// select a keyword token or construct an identifier token depending on whether a given string
893    /// is a known keyword.
894    pub fn keyword_searcher() -> aho_corasick::AhoCorasick {
895        use aho_corasick::AhoCorasick;
896
897        // Execute a search for any of the keywords above, matching longest first, and requiring
898        // the match to cover the entire input.
899        AhoCorasick::builder()
900            .match_kind(aho_corasick::MatchKind::LeftmostLongest)
901            .start_kind(aho_corasick::StartKind::Anchored)
902            .build(Self::KEYWORDS.iter().map(|(kw, _)| kw).copied())
903            .expect("unable to build aho-corasick searcher for token")
904    }
905
906    /// Returns an appropriate [Token] depending on whether the given string is a keyword or an
907    /// identifier.
908    ///
909    /// NOTE: This constructs and throws away an expensive-to-construct Aho-Corasick state machine.
910    /// You should not call this from any code on a hot path. Instead, construct the state machine
911    /// once using [Token::keyword_searcher], and reuse it for all searches using
912    /// [Token::from_keyword_or_ident_with_searcher].
913    ///
914    /// Currently, this function is only called along one code path, which is when we are
915    /// constructing a parser error in which we wish to determine which, if any, of the expected
916    /// tokens are instruction opcode keywords, so we can collapse them into a more user-friendly
917    /// error message. This is not on a hot path, so we don't care if it is a bit slow.
918    pub fn from_keyword_or_ident(s: &'input str) -> Self {
919        let searcher = Self::keyword_searcher();
920        Self::from_keyword_or_ident_with_searcher(s, &searcher)
921    }
922
923    /// This is the primary function you should use when you wish to get an appropriate token for
924    /// a given input string, depending on whether it is a keyword or an identifier.
925    ///
926    /// See [Token::keyword_searcher] for additional information on how this is meant to be used.
927    pub fn from_keyword_or_ident_with_searcher(
928        s: &'input str,
929        searcher: &aho_corasick::AhoCorasick,
930    ) -> Self {
931        let input = aho_corasick::Input::new(s).anchored(aho_corasick::Anchored::Yes);
932        match searcher.find(input) {
933            // No match, it's an ident
934            None => Token::Ident(s),
935            // If the match is not exact, it's an ident
936            Some(matched) if matched.len() != s.len() => Token::Ident(s),
937            // Otherwise clone the Token corresponding to the keyword that was matched
938            Some(matched) => Self::KEYWORDS[matched.pattern().as_usize()].1.clone(),
939        }
940    }
941
942    /// Parses a [Token] from a string corresponding to that token.
943    ///
944    /// This solely exists to aid in constructing more user-friendly error messages in certain
945    /// scenarios, and is otherwise not used (nor should it be). It is quite expensive to call due
946    /// to invoking [Token::keyword_searcher] under the covers. See the documentation for that
947    /// function for more details.
948    pub fn parse(s: &'input str) -> Option<Token<'input>> {
949        match Token::from_keyword_or_ident(s) {
950            Token::Ident(_) => {
951                // Nope, try again
952                match s {
953                    "@" => Some(Token::At),
954                    "!" => Some(Token::Bang),
955                    "::" => Some(Token::ColonColon),
956                    "." => Some(Token::Dot),
957                    "," => Some(Token::Comma),
958                    "=" => Some(Token::Equal),
959                    "(" => Some(Token::Lparen),
960                    "[" => Some(Token::Lbracket),
961                    "-" => Some(Token::Minus),
962                    "+" => Some(Token::Plus),
963                    "//" => Some(Token::SlashSlash),
964                    "/" => Some(Token::Slash),
965                    "*" => Some(Token::Star),
966                    ")" => Some(Token::Rparen),
967                    "]" => Some(Token::Rbracket),
968                    "->" => Some(Token::Rstab),
969                    "end of file" => Some(Token::Eof),
970                    "module doc" => {
971                        Some(Token::DocComment(DocumentationType::Module(String::new())))
972                    },
973                    "doc comment" => {
974                        Some(Token::DocComment(DocumentationType::Form(String::new())))
975                    },
976                    "comment" => Some(Token::Comment),
977                    "hex-encoded value" => Some(Token::HexValue(IntValue::U8(0))),
978                    "bin-encoded value" => Some(Token::BinValue(BinEncodedValue::U8(0))),
979                    "integer" => Some(Token::Int(0)),
980                    "identifier" => Some(Token::Ident("")),
981                    "constant identifier" => Some(Token::ConstantIdent("")),
982                    "quoted identifier" => Some(Token::QuotedIdent("")),
983                    "quoted string" => Some(Token::QuotedString("")),
984                    _ => None,
985                }
986            },
987            // We matched a keyword
988            token => Some(token),
989        }
990    }
991}