miden_assembly_syntax/parser/
token.rs

1use alloc::string::String;
2use core::fmt;
3
4use miden_core::{
5    Felt, FieldElement, StarkField,
6    utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable},
7};
8#[cfg(feature = "serde")]
9use serde::{Deserialize, Serialize};
10
11// DOCUMENTATION TYPE
12// ================================================================================================
13
14/// Represents the scope of a given documentation comment
15#[derive(Debug, Clone)]
16pub enum DocumentationType {
17    Module(String),
18    Form(String),
19}
20
21impl From<DocumentationType> for String {
22    fn from(doc: DocumentationType) -> Self {
23        match doc {
24            DocumentationType::Module(s) => s,
25            DocumentationType::Form(s) => s,
26        }
27    }
28}
29
30impl core::ops::Deref for DocumentationType {
31    type Target = String;
32    fn deref(&self) -> &Self::Target {
33        match self {
34            Self::Module(s) => s,
35            Self::Form(s) => s,
36        }
37    }
38}
39
40// PUSH VALUE
41// ================================================================================================
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PushValue {
45    Int(IntValue),
46    Word(WordValue),
47}
48
49impl From<u8> for PushValue {
50    fn from(value: u8) -> Self {
51        Self::Int(value.into())
52    }
53}
54
55impl From<u16> for PushValue {
56    fn from(value: u16) -> Self {
57        Self::Int(value.into())
58    }
59}
60
61impl From<u32> for PushValue {
62    fn from(value: u32) -> Self {
63        Self::Int(value.into())
64    }
65}
66
67impl From<Felt> for PushValue {
68    fn from(value: Felt) -> Self {
69        Self::Int(value.into())
70    }
71}
72
73impl From<IntValue> for PushValue {
74    fn from(value: IntValue) -> Self {
75        Self::Int(value)
76    }
77}
78
79impl From<WordValue> for PushValue {
80    fn from(value: WordValue) -> Self {
81        Self::Word(value)
82    }
83}
84
85impl fmt::Display for PushValue {
86    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
87        match self {
88            Self::Int(value) => fmt::Display::fmt(value, f),
89            Self::Word(value) => fmt::Display::fmt(value, f),
90        }
91    }
92}
93
94impl crate::prettier::PrettyPrint for PushValue {
95    fn render(&self) -> crate::prettier::Document {
96        match self {
97            Self::Int(value) => value.render(),
98            Self::Word(value) => value.render(),
99        }
100    }
101}
102
103// WORD VALUE
104// ================================================================================================
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
107#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
108#[cfg_attr(feature = "serde", serde(transparent))]
109pub struct WordValue(pub [Felt; 4]);
110
111impl fmt::Display for WordValue {
112    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113        write!(
114            f,
115            "{:#08x}{:08x}{:08x}{:08x}",
116            &self.0[0].as_int(),
117            &self.0[1].as_int(),
118            &self.0[2].as_int(),
119            &self.0[3].as_int(),
120        )
121    }
122}
123
124impl crate::prettier::PrettyPrint for WordValue {
125    fn render(&self) -> crate::prettier::Document {
126        use crate::prettier::*;
127
128        const_text("[")
129            + self
130                .0
131                .iter()
132                .copied()
133                .map(display)
134                .reduce(|acc, doc| acc + const_text(",") + doc)
135                .unwrap_or_default()
136            + const_text("]")
137    }
138}
139
140impl PartialOrd for WordValue {
141    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
142        Some(self.cmp(other))
143    }
144}
145impl Ord for WordValue {
146    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
147        let (WordValue([l0, l1, l2, l3]), WordValue([r0, r1, r2, r3])) = (self, other);
148        l0.as_int()
149            .cmp(&r0.as_int())
150            .then_with(|| l1.as_int().cmp(&r1.as_int()))
151            .then_with(|| l2.as_int().cmp(&r2.as_int()))
152            .then_with(|| l3.as_int().cmp(&r3.as_int()))
153    }
154}
155
156impl core::hash::Hash for WordValue {
157    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
158        let WordValue([a, b, c, d]) = self;
159        [a.as_int(), b.as_int(), c.as_int(), d.as_int()].hash(state)
160    }
161}
162
163#[cfg(feature = "arbitrary")]
164impl proptest::arbitrary::Arbitrary for WordValue {
165    type Parameters = ();
166
167    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
168        use proptest::{array::uniform4, num, strategy::Strategy};
169        uniform4(num::u64::ANY.prop_map(Felt::new)).prop_map(WordValue).boxed()
170    }
171
172    type Strategy = proptest::prelude::BoxedStrategy<Self>;
173}
174
175impl Serializable for WordValue {
176    fn write_into<W: ByteWriter>(&self, target: &mut W) {
177        self.0[0].write_into(target);
178        self.0[1].write_into(target);
179        self.0[2].write_into(target);
180        self.0[3].write_into(target);
181    }
182}
183
184impl Deserializable for WordValue {
185    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
186        let a = Felt::read_from(source)?;
187        let b = Felt::read_from(source)?;
188        let c = Felt::read_from(source)?;
189        let d = Felt::read_from(source)?;
190        Ok(Self([a, b, c, d]))
191    }
192}
193
194// INT VALUE
195// ================================================================================================
196
197/// Represents one of the various types of values that have a hex-encoded representation in Miden
198/// Assembly source files.
199#[derive(Debug, Copy, Clone, PartialEq, Eq)]
200#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
201#[cfg_attr(feature = "serde", serde(untagged))]
202pub enum IntValue {
203    /// A tiny value
204    U8(u8),
205    /// A small value
206    U16(u16),
207    /// A u32 constant, typically represents a memory address
208    U32(u32),
209    /// A single field element, 8 bytes, encoded as 16 hex digits
210    Felt(Felt),
211}
212
213impl From<u8> for IntValue {
214    fn from(value: u8) -> Self {
215        Self::U8(value)
216    }
217}
218
219impl From<u16> for IntValue {
220    fn from(value: u16) -> Self {
221        Self::U16(value)
222    }
223}
224
225impl From<u32> for IntValue {
226    fn from(value: u32) -> Self {
227        Self::U32(value)
228    }
229}
230
231impl From<Felt> for IntValue {
232    fn from(value: Felt) -> Self {
233        Self::Felt(value)
234    }
235}
236
237impl IntValue {
238    pub fn as_int(&self) -> u64 {
239        match self {
240            Self::U8(value) => *value as u64,
241            Self::U16(value) => *value as u64,
242            Self::U32(value) => *value as u64,
243            Self::Felt(value) => value.as_int(),
244        }
245    }
246}
247
248impl core::ops::Add<IntValue> for IntValue {
249    type Output = IntValue;
250
251    fn add(self, rhs: IntValue) -> Self::Output {
252        super::lexer::shrink_u64_hex(self.as_int() + rhs.as_int())
253    }
254}
255
256impl core::ops::Sub<IntValue> for IntValue {
257    type Output = IntValue;
258
259    fn sub(self, rhs: IntValue) -> Self::Output {
260        super::lexer::shrink_u64_hex(self.as_int() - rhs.as_int())
261    }
262}
263
264impl core::ops::Mul<IntValue> for IntValue {
265    type Output = IntValue;
266
267    fn mul(self, rhs: IntValue) -> Self::Output {
268        super::lexer::shrink_u64_hex(self.as_int() * rhs.as_int())
269    }
270}
271
272impl core::ops::Div<IntValue> for IntValue {
273    type Output = IntValue;
274
275    fn div(self, rhs: IntValue) -> Self::Output {
276        super::lexer::shrink_u64_hex(self.as_int() / rhs.as_int())
277    }
278}
279
280impl PartialEq<Felt> for IntValue {
281    fn eq(&self, other: &Felt) -> bool {
282        match self {
283            Self::U8(lhs) => (*lhs as u64) == other.as_int(),
284            Self::U16(lhs) => (*lhs as u64) == other.as_int(),
285            Self::U32(lhs) => (*lhs as u64) == other.as_int(),
286            Self::Felt(lhs) => lhs == other,
287        }
288    }
289}
290
291impl fmt::Display for IntValue {
292    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
293        match self {
294            Self::U8(value) => write!(f, "{value}"),
295            Self::U16(value) => write!(f, "{value}"),
296            Self::U32(value) => write!(f, "{value:#04x}"),
297            Self::Felt(value) => write!(f, "{:#08x}", &value.as_int().to_be()),
298        }
299    }
300}
301
302impl crate::prettier::PrettyPrint for IntValue {
303    fn render(&self) -> crate::prettier::Document {
304        match self {
305            Self::U8(v) => v.render(),
306            Self::U16(v) => v.render(),
307            Self::U32(v) => v.render(),
308            Self::Felt(v) => u64::from(*v).render(),
309        }
310    }
311}
312
313impl PartialOrd for IntValue {
314    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
315        Some(self.cmp(other))
316    }
317}
318
319impl Ord for IntValue {
320    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
321        use core::cmp::Ordering;
322        match (self, other) {
323            (Self::U8(l), Self::U8(r)) => l.cmp(r),
324            (Self::U8(_), _) => Ordering::Less,
325            (Self::U16(_), Self::U8(_)) => Ordering::Greater,
326            (Self::U16(l), Self::U16(r)) => l.cmp(r),
327            (Self::U16(_), _) => Ordering::Less,
328            (Self::U32(_), Self::U8(_) | Self::U16(_)) => Ordering::Greater,
329            (Self::U32(l), Self::U32(r)) => l.cmp(r),
330            (Self::U32(_), _) => Ordering::Less,
331            (Self::Felt(_), Self::U8(_) | Self::U16(_) | Self::U32(_)) => Ordering::Greater,
332            (Self::Felt(l), Self::Felt(r)) => l.as_int().cmp(&r.as_int()),
333        }
334    }
335}
336
337impl core::hash::Hash for IntValue {
338    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
339        core::mem::discriminant(self).hash(state);
340        match self {
341            Self::U8(value) => value.hash(state),
342            Self::U16(value) => value.hash(state),
343            Self::U32(value) => value.hash(state),
344            Self::Felt(value) => value.as_int().hash(state),
345        }
346    }
347}
348
349impl Serializable for IntValue {
350    fn write_into<W: ByteWriter>(&self, target: &mut W) {
351        self.as_int().write_into(target)
352    }
353}
354
355impl Deserializable for IntValue {
356    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
357        let raw = source.read_u64()?;
358        if raw >= Felt::MODULUS {
359            Err(DeserializationError::InvalidValue(
360                "int value is greater than field modulus".into(),
361            ))
362        } else {
363            Ok(super::lexer::shrink_u64_hex(raw))
364        }
365    }
366}
367
368#[cfg(feature = "arbitrary")]
369impl proptest::arbitrary::Arbitrary for IntValue {
370    type Parameters = ();
371
372    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
373        use proptest::{num, prop_oneof, strategy::Strategy};
374        prop_oneof![
375            num::u8::ANY.prop_map(IntValue::U8),
376            ((u8::MAX as u16 + 1)..=u16::MAX).prop_map(IntValue::U16),
377            ((u16::MAX as u32 + 1)..=u32::MAX).prop_map(IntValue::U32),
378            ((u32::MAX as u64 + 1)..=crate::FIELD_MODULUS)
379                .prop_map(|n| IntValue::Felt(Felt::new(n))),
380        ]
381        .boxed()
382    }
383
384    type Strategy = proptest::prelude::BoxedStrategy<Self>;
385}
386
387// BINARY ENCODED VALUE
388// ================================================================================================
389
390/// Represents one of the various types of values that have a hex-encoded representation in Miden
391/// Assembly source files.
392#[derive(Debug, Copy, Clone, PartialEq, Eq)]
393pub enum BinEncodedValue {
394    /// A tiny value
395    U8(u8),
396    /// A small value
397    U16(u16),
398    /// A u32 constant, typically represents a memory address
399    U32(u32),
400}
401
402// TOKEN
403// ================================================================================================
404
405/// The token type produced by [crate::parser::Lexer], and consumed by the parser.
406#[derive(Debug, Clone)]
407pub enum Token<'input> {
408    Add,
409    Addrspace,
410    Adv,
411    AdvMap,
412    InsertHdword,
413    InsertHdwordWithDomain,
414    InsertHqword,
415    InsertHperm,
416    InsertMem,
417    AdvLoadw,
418    AdvPipe,
419    AdvPush,
420    AdvStack,
421    PushMapval,
422    PushMapvaln,
423    PushMtnode,
424    And,
425    Assert,
426    Assertz,
427    AssertEq,
428    AssertEqw,
429    EvalCircuit,
430    Begin,
431    Breakpoint,
432    Byte,
433    Caller,
434    Call,
435    Cdrop,
436    Cdropw,
437    Clk,
438    Const,
439    Cswap,
440    Cswapw,
441    Debug,
442    Div,
443    Drop,
444    Dropw,
445    Dup,
446    Dupw,
447    Dynexec,
448    Dyncall,
449    Else,
450    Emit,
451    End,
452    Enum,
453    Eq,
454    Eqw,
455    Ext2Add,
456    Ext2Div,
457    Ext2Inv,
458    Ext2Mul,
459    Ext2Neg,
460    Ext2Sub,
461    Err,
462    Exec,
463    Export,
464    Exp,
465    ExpU,
466    False,
467    Felt,
468    FriExt2Fold4,
469    Gt,
470    Gte,
471    Hash,
472    HasMapkey,
473    HornerBase,
474    HornerExt,
475    Hperm,
476    Hmerge,
477    I1,
478    I8,
479    I16,
480    I32,
481    I64,
482    I128,
483    If,
484    ILog2,
485    Inv,
486    IsOdd,
487    Local,
488    Locaddr,
489    LocLoad,
490    LocLoadw,
491    LocStore,
492    LocStorew,
493    Lt,
494    Lte,
495    Mem,
496    MemLoad,
497    MemLoadw,
498    MemLoadwBe,
499    MemLoadwLe,
500    MemStore,
501    MemStorew,
502    MemStorewBe,
503    MemStorewLe,
504    MemStream,
505    Movdn,
506    Movdnw,
507    Movup,
508    Movupw,
509    MtreeGet,
510    MtreeMerge,
511    MtreeSet,
512    MtreeVerify,
513    Mul,
514    Neg,
515    Neq,
516    Not,
517    Nop,
518    Or,
519    Padw,
520    Pow2,
521    Proc,
522    Procref,
523    Ptr,
524    Pub,
525    Push,
526    Repeat,
527    Reversew,
528    Reversedw,
529    Range,
530    Sdepth,
531    Stack,
532    Struct,
533    Sub,
534    Swap,
535    Swapw,
536    Swapdw,
537    Syscall,
538    Trace,
539    True,
540    Type,
541    Use,
542    U8,
543    U16,
544    U32,
545    U32And,
546    U32Assert,
547    U32Assert2,
548    U32Assertw,
549    U32Cast,
550    U32Div,
551    U32Divmod,
552    U32Gt,
553    U32Gte,
554    U32Lt,
555    U32Lte,
556    U32Max,
557    U32Min,
558    U32Mod,
559    U32Not,
560    U32Or,
561    U32OverflowingAdd,
562    U32OverflowingAdd3,
563    U32OverflowingMadd,
564    U32OverflowingMul,
565    U32OverflowingSub,
566    U32Popcnt,
567    U32Clz,
568    U32Ctz,
569    U32Clo,
570    U32Cto,
571    U32Rotl,
572    U32Rotr,
573    U32Shl,
574    U32Shr,
575    U32Split,
576    U32Test,
577    U32Testw,
578    U32WrappingAdd,
579    U32WrappingAdd3,
580    U32WrappingMadd,
581    U32WrappingMul,
582    U32WrappingSub,
583    U32Xor,
584    U64,
585    U128,
586    While,
587    Word,
588    Event,
589    Xor,
590    At,
591    Bang,
592    Colon,
593    ColonColon,
594    Dot,
595    Comma,
596    Equal,
597    Langle,
598    Lparen,
599    Lbrace,
600    Lbracket,
601    Minus,
602    Plus,
603    Semicolon,
604    SlashSlash,
605    Slash,
606    Star,
607    Rangle,
608    Rparen,
609    Rbrace,
610    Rbracket,
611    Rstab,
612    DocComment(DocumentationType),
613    HexValue(IntValue),
614    HexWord(WordValue),
615    BinValue(BinEncodedValue),
616    Int(u64),
617    Ident(&'input str),
618    ConstantIdent(&'input str),
619    QuotedIdent(&'input str),
620    QuotedString(&'input str),
621    Comment,
622    Eof,
623}
624
625impl fmt::Display for Token<'_> {
626    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
627        match self {
628            Token::Add => write!(f, "add"),
629            Token::Addrspace => write!(f, "addrspace"),
630            Token::Adv => write!(f, "adv"),
631            Token::AdvMap => write!(f, "adv_map"),
632            Token::AdvStack => write!(f, "adv_stack"),
633            Token::InsertHdword => write!(f, "insert_hdword"),
634            Token::InsertHdwordWithDomain => write!(f, "insert_hdword_d"),
635            Token::InsertHqword => write!(f, "insert_hqword"),
636            Token::InsertHperm => write!(f, "insert_hperm"),
637            Token::InsertMem => write!(f, "insert_mem"),
638            Token::AdvLoadw => write!(f, "adv_loadw"),
639            Token::AdvPipe => write!(f, "adv_pipe"),
640            Token::AdvPush => write!(f, "adv_push"),
641            Token::PushMapval => write!(f, "push_mapval"),
642            Token::PushMapvaln => write!(f, "push_mapvaln"),
643            Token::PushMtnode => write!(f, "push_mtnode"),
644            Token::And => write!(f, "and"),
645            Token::Assert => write!(f, "assert"),
646            Token::Assertz => write!(f, "assertz"),
647            Token::AssertEq => write!(f, "assert_eq"),
648            Token::AssertEqw => write!(f, "assert_eqw"),
649            Token::EvalCircuit => write!(f, "eval_circuit"),
650            Token::Begin => write!(f, "begin"),
651            Token::Breakpoint => write!(f, "breakpoint"),
652            Token::Byte => write!(f, "byte"),
653            Token::Caller => write!(f, "caller"),
654            Token::Call => write!(f, "call"),
655            Token::Cdrop => write!(f, "cdrop"),
656            Token::Cdropw => write!(f, "cdropw"),
657            Token::Clk => write!(f, "clk"),
658            Token::Const => write!(f, "const"),
659            Token::Cswap => write!(f, "cswap"),
660            Token::Cswapw => write!(f, "cswapw"),
661            Token::Debug => write!(f, "debug"),
662            Token::Div => write!(f, "div"),
663            Token::Drop => write!(f, "drop"),
664            Token::Dropw => write!(f, "dropw"),
665            Token::Dup => write!(f, "dup"),
666            Token::Dupw => write!(f, "dupw"),
667            Token::Dynexec => write!(f, "dynexec"),
668            Token::Dyncall => write!(f, "dyncall"),
669            Token::Else => write!(f, "else"),
670            Token::Emit => write!(f, "emit"),
671            Token::End => write!(f, "end"),
672            Token::Enum => write!(f, "enum"),
673            Token::Eq => write!(f, "eq"),
674            Token::Eqw => write!(f, "eqw"),
675            Token::Ext2Add => write!(f, "ext2add"),
676            Token::Ext2Div => write!(f, "ext2div"),
677            Token::Ext2Inv => write!(f, "ext2inv"),
678            Token::Ext2Mul => write!(f, "ext2mul"),
679            Token::Ext2Neg => write!(f, "ext2neg"),
680            Token::Ext2Sub => write!(f, "ext2sub"),
681            Token::Err => write!(f, "err"),
682            Token::Exec => write!(f, "exec"),
683            Token::Exp => write!(f, "exp"),
684            Token::ExpU => write!(f, "exp.u"),
685            Token::Export => write!(f, "export"),
686            Token::False => write!(f, "false"),
687            Token::Felt => write!(f, "felt"),
688            Token::FriExt2Fold4 => write!(f, "fri_ext2fold4"),
689            Token::Gt => write!(f, "gt"),
690            Token::Gte => write!(f, "gte"),
691            Token::Hash => write!(f, "hash"),
692            Token::HasMapkey => write!(f, "has_mapkey"),
693            Token::Hperm => write!(f, "hperm"),
694            Token::Hmerge => write!(f, "hmerge"),
695            Token::I1 => write!(f, "i1"),
696            Token::I8 => write!(f, "i8"),
697            Token::I16 => write!(f, "i16"),
698            Token::I32 => write!(f, "i32"),
699            Token::I64 => write!(f, "i64"),
700            Token::I128 => write!(f, "i128"),
701            Token::If => write!(f, "if"),
702            Token::ILog2 => write!(f, "ilog2"),
703            Token::Inv => write!(f, "inv"),
704            Token::IsOdd => write!(f, "is_odd"),
705            Token::Local => write!(f, "local"),
706            Token::Locaddr => write!(f, "locaddr"),
707            Token::LocLoad => write!(f, "loc_load"),
708            Token::LocLoadw => write!(f, "loc_loadw"),
709            Token::LocStore => write!(f, "loc_store"),
710            Token::LocStorew => write!(f, "loc_storew"),
711            Token::Lt => write!(f, "lt"),
712            Token::Lte => write!(f, "lte"),
713            Token::Mem => write!(f, "mem"),
714            Token::MemLoad => write!(f, "mem_load"),
715            Token::MemLoadw => write!(f, "mem_loadw"),
716            Token::MemLoadwBe => write!(f, "mem_loadw_be"),
717            Token::MemLoadwLe => write!(f, "mem_loadw_le"),
718            Token::MemStore => write!(f, "mem_store"),
719            Token::MemStorew => write!(f, "mem_storew"),
720            Token::MemStorewBe => write!(f, "mem_storew_be"),
721            Token::MemStorewLe => write!(f, "mem_storew_le"),
722            Token::MemStream => write!(f, "mem_stream"),
723            Token::Movdn => write!(f, "movdn"),
724            Token::Movdnw => write!(f, "movdnw"),
725            Token::Movup => write!(f, "movup"),
726            Token::Movupw => write!(f, "movupw"),
727            Token::MtreeGet => write!(f, "mtree_get"),
728            Token::MtreeMerge => write!(f, "mtree_merge"),
729            Token::MtreeSet => write!(f, "mtree_set"),
730            Token::MtreeVerify => write!(f, "mtree_verify"),
731            Token::Mul => write!(f, "mul"),
732            Token::Neg => write!(f, "neg"),
733            Token::Neq => write!(f, "neq"),
734            Token::Not => write!(f, "not"),
735            Token::Nop => write!(f, "nop"),
736            Token::Or => write!(f, "or"),
737            Token::Padw => write!(f, "padw"),
738            Token::Pow2 => write!(f, "pow2"),
739            Token::Proc => write!(f, "proc"),
740            Token::Procref => write!(f, "procref"),
741            Token::Ptr => write!(f, "ptr"),
742            Token::Pub => write!(f, "pub"),
743            Token::Push => write!(f, "push"),
744            Token::HornerBase => write!(f, "horner_eval_base"),
745            Token::HornerExt => write!(f, "horner_eval_ext"),
746            Token::Repeat => write!(f, "repeat"),
747            Token::Reversew => write!(f, "reversew"),
748            Token::Reversedw => write!(f, "reversedw"),
749            Token::Sdepth => write!(f, "sdepth"),
750            Token::Stack => write!(f, "stack"),
751            Token::Struct => write!(f, "struct"),
752            Token::Sub => write!(f, "sub"),
753            Token::Swap => write!(f, "swap"),
754            Token::Swapw => write!(f, "swapw"),
755            Token::Swapdw => write!(f, "swapdw"),
756            Token::Syscall => write!(f, "syscall"),
757            Token::Trace => write!(f, "trace"),
758            Token::True => write!(f, "true"),
759            Token::Type => write!(f, "type"),
760            Token::Use => write!(f, "use"),
761            Token::U8 => write!(f, "u8"),
762            Token::U16 => write!(f, "u16"),
763            Token::U32 => write!(f, "u32"),
764            Token::U32And => write!(f, "u32and"),
765            Token::U32Assert => write!(f, "u32assert"),
766            Token::U32Assert2 => write!(f, "u32assert2"),
767            Token::U32Assertw => write!(f, "u32assertw"),
768            Token::U32Cast => write!(f, "u32cast"),
769            Token::U32Div => write!(f, "u32div"),
770            Token::U32Divmod => write!(f, "u32divmod"),
771            Token::U32Gt => write!(f, "u32gt"),
772            Token::U32Gte => write!(f, "u32gte"),
773            Token::U32Lt => write!(f, "u32lt"),
774            Token::U32Lte => write!(f, "u32lte"),
775            Token::U32Max => write!(f, "u32max"),
776            Token::U32Min => write!(f, "u32min"),
777            Token::U32Mod => write!(f, "u32mod"),
778            Token::U32Not => write!(f, "u32not"),
779            Token::U32Or => write!(f, "u32or"),
780            Token::U32OverflowingAdd => write!(f, "u32overflowing_add"),
781            Token::U32OverflowingAdd3 => write!(f, "u32overflowing_add3"),
782            Token::U32OverflowingMadd => write!(f, "u32overflowing_madd"),
783            Token::U32OverflowingMul => write!(f, "u32overflowing_mul"),
784            Token::U32OverflowingSub => write!(f, "u32overflowing_sub"),
785            Token::U32Popcnt => write!(f, "u32popcnt"),
786            Token::U32Clz => write!(f, "u32clz"),
787            Token::U32Ctz => write!(f, "u32ctz"),
788            Token::U32Clo => write!(f, "u32clo"),
789            Token::U32Cto => write!(f, "u32cto"),
790            Token::U32Rotl => write!(f, "u32rotl"),
791            Token::U32Rotr => write!(f, "u32rotr"),
792            Token::U32Shl => write!(f, "u32shl"),
793            Token::U32Shr => write!(f, "u32shr"),
794            Token::U32Split => write!(f, "u32split"),
795            Token::U32Test => write!(f, "u32test"),
796            Token::U32Testw => write!(f, "u32testw"),
797            Token::U32WrappingAdd => write!(f, "u32wrapping_add"),
798            Token::U32WrappingAdd3 => write!(f, "u32wrapping_add3"),
799            Token::U32WrappingMadd => write!(f, "u32wrapping_madd"),
800            Token::U32WrappingMul => write!(f, "u32wrapping_mul"),
801            Token::U32WrappingSub => write!(f, "u32wrapping_sub"),
802            Token::U32Xor => write!(f, "u32xor"),
803            Token::U64 => write!(f, "u64"),
804            Token::U128 => write!(f, "u128"),
805            Token::While => write!(f, "while"),
806            Token::Word => write!(f, "word"),
807            Token::Event => write!(f, "event"),
808            Token::Xor => write!(f, "xor"),
809            Token::At => write!(f, "@"),
810            Token::Bang => write!(f, "!"),
811            Token::Colon => write!(f, ":"),
812            Token::ColonColon => write!(f, "::"),
813            Token::Dot => write!(f, "."),
814            Token::Comma => write!(f, ","),
815            Token::Equal => write!(f, "="),
816            Token::Langle => write!(f, "<"),
817            Token::Lparen => write!(f, "("),
818            Token::Lbrace => write!(f, "{{"),
819            Token::Lbracket => write!(f, "["),
820            Token::Minus => write!(f, "-"),
821            Token::Plus => write!(f, "+"),
822            Token::Semicolon => write!(f, ";"),
823            Token::SlashSlash => write!(f, "//"),
824            Token::Slash => write!(f, "/"),
825            Token::Star => write!(f, "*"),
826            Token::Rangle => write!(f, ">"),
827            Token::Rparen => write!(f, ")"),
828            Token::Rbrace => write!(f, "}}"),
829            Token::Rbracket => write!(f, "]"),
830            Token::Rstab => write!(f, "->"),
831            Token::Range => write!(f, ".."),
832            Token::DocComment(DocumentationType::Module(_)) => f.write_str("module doc"),
833            Token::DocComment(DocumentationType::Form(_)) => f.write_str("doc comment"),
834            Token::HexValue(_) => f.write_str("hex-encoded value"),
835            Token::HexWord(_) => f.write_str("hex-encoded word"),
836            Token::BinValue(_) => f.write_str("bin-encoded value"),
837            Token::Int(_) => f.write_str("integer"),
838            Token::Ident(_) => f.write_str("identifier"),
839            Token::ConstantIdent(_) => f.write_str("constant identifier"),
840            Token::QuotedIdent(_) => f.write_str("quoted identifier"),
841            Token::QuotedString(_) => f.write_str("quoted string"),
842            Token::Comment => f.write_str("comment"),
843            Token::Eof => write!(f, "end of file"),
844        }
845    }
846}
847
848impl<'input> Token<'input> {
849    /// Returns true if this token represents the name of an instruction.
850    ///
851    /// This is used to simplify diagnostic output related to expected tokens so as not to
852    /// overwhelm the user with a ton of possible expected instruction variants.
853    pub fn is_instruction(&self) -> bool {
854        matches!(
855            self,
856            Token::Add
857                | Token::Adv
858                | Token::InsertHdword
859                | Token::InsertHdwordWithDomain
860                | Token::InsertHqword
861                | Token::InsertHperm
862                | Token::InsertMem
863                | Token::AdvLoadw
864                | Token::AdvPipe
865                | Token::AdvPush
866                | Token::AdvStack
867                | Token::PushMapval
868                | Token::PushMapvaln
869                | Token::PushMtnode
870                | Token::And
871                | Token::Assert
872                | Token::Assertz
873                | Token::AssertEq
874                | Token::AssertEqw
875                | Token::EvalCircuit
876                | Token::Breakpoint
877                | Token::Caller
878                | Token::Call
879                | Token::Cdrop
880                | Token::Cdropw
881                | Token::Clk
882                | Token::Cswap
883                | Token::Cswapw
884                | Token::Debug
885                | Token::Div
886                | Token::Drop
887                | Token::Dropw
888                | Token::Dup
889                | Token::Dupw
890                | Token::Dynexec
891                | Token::Dyncall
892                | Token::Emit
893                | Token::Eq
894                | Token::Eqw
895                | Token::Ext2Add
896                | Token::Ext2Div
897                | Token::Ext2Inv
898                | Token::Ext2Mul
899                | Token::Ext2Neg
900                | Token::Ext2Sub
901                | Token::Exec
902                | Token::Exp
903                | Token::ExpU
904                | Token::FriExt2Fold4
905                | Token::Gt
906                | Token::Gte
907                | Token::Hash
908                | Token::Hperm
909                | Token::Hmerge
910                | Token::HornerBase
911                | Token::HornerExt
912                | Token::ILog2
913                | Token::Inv
914                | Token::IsOdd
915                | Token::Local
916                | Token::Locaddr
917                | Token::LocLoad
918                | Token::LocLoadw
919                | Token::LocStore
920                | Token::LocStorew
921                | Token::Lt
922                | Token::Lte
923                | Token::Mem
924                | Token::MemLoad
925                | Token::MemLoadw
926                | Token::MemLoadwBe
927                | Token::MemLoadwLe
928                | Token::MemStore
929                | Token::MemStorew
930                | Token::MemStorewBe
931                | Token::MemStorewLe
932                | Token::MemStream
933                | Token::Movdn
934                | Token::Movdnw
935                | Token::Movup
936                | Token::Movupw
937                | Token::MtreeGet
938                | Token::MtreeMerge
939                | Token::MtreeSet
940                | Token::MtreeVerify
941                | Token::Mul
942                | Token::Neg
943                | Token::Neq
944                | Token::Not
945                | Token::Nop
946                | Token::Or
947                | Token::Padw
948                | Token::Pow2
949                | Token::Procref
950                | Token::Push
951                | Token::Repeat
952                | Token::Reversew
953                | Token::Reversedw
954                | Token::Sdepth
955                | Token::Stack
956                | Token::Sub
957                | Token::Swap
958                | Token::Swapw
959                | Token::Swapdw
960                | Token::Syscall
961                | Token::Trace
962                | Token::U32And
963                | Token::U32Assert
964                | Token::U32Assert2
965                | Token::U32Assertw
966                | Token::U32Cast
967                | Token::U32Div
968                | Token::U32Divmod
969                | Token::U32Gt
970                | Token::U32Gte
971                | Token::U32Lt
972                | Token::U32Lte
973                | Token::U32Max
974                | Token::U32Min
975                | Token::U32Mod
976                | Token::U32Not
977                | Token::U32Or
978                | Token::U32OverflowingAdd
979                | Token::U32OverflowingAdd3
980                | Token::U32OverflowingMadd
981                | Token::U32OverflowingMul
982                | Token::U32OverflowingSub
983                | Token::U32Popcnt
984                | Token::U32Clz
985                | Token::U32Ctz
986                | Token::U32Clo
987                | Token::U32Cto
988                | Token::U32Rotl
989                | Token::U32Rotr
990                | Token::U32Shl
991                | Token::U32Shr
992                | Token::U32Split
993                | Token::U32Test
994                | Token::U32Testw
995                | Token::U32WrappingAdd
996                | Token::U32WrappingAdd3
997                | Token::U32WrappingMadd
998                | Token::U32WrappingMul
999                | Token::U32WrappingSub
1000                | Token::U32Xor
1001                | Token::Xor
1002        )
1003    }
1004
1005    /// Returns true if this token represents the name of an type or a type-related keyword.
1006    ///
1007    /// This is used to simplify diagnostic output related to expected tokens so as not to
1008    /// overwhelm the user with a ton of possible expected tokens.
1009    pub fn is_type_keyword(&self) -> bool {
1010        matches!(
1011            self,
1012            Token::Addrspace
1013                | Token::Ptr
1014                | Token::I1
1015                | Token::I8
1016                | Token::I16
1017                | Token::I32
1018                | Token::I64
1019                | Token::I128
1020                | Token::U8
1021                | Token::U16
1022                | Token::U32
1023                | Token::U64
1024                | Token::U128
1025                | Token::Struct
1026        )
1027    }
1028
1029    const KEYWORDS: &'static [(&'static str, Token<'static>)] = &[
1030        ("add", Token::Add),
1031        ("addrspace", Token::Addrspace),
1032        ("adv", Token::Adv),
1033        ("adv_map", Token::AdvMap),
1034        ("eval_circuit", Token::EvalCircuit),
1035        ("insert_hdword", Token::InsertHdword),
1036        ("insert_hdword_d", Token::InsertHdwordWithDomain),
1037        ("insert_hqword", Token::InsertHqword),
1038        ("insert_hperm", Token::InsertHperm),
1039        ("insert_mem", Token::InsertMem),
1040        ("adv_loadw", Token::AdvLoadw),
1041        ("adv_pipe", Token::AdvPipe),
1042        ("adv_push", Token::AdvPush),
1043        ("adv_stack", Token::AdvStack),
1044        ("push_mapval", Token::PushMapval),
1045        ("push_mapvaln", Token::PushMapvaln),
1046        ("push_mtnode", Token::PushMtnode),
1047        ("and", Token::And),
1048        ("assert", Token::Assert),
1049        ("assertz", Token::Assertz),
1050        ("assert_eq", Token::AssertEq),
1051        ("assert_eqw", Token::AssertEqw),
1052        ("begin", Token::Begin),
1053        ("breakpoint", Token::Breakpoint),
1054        ("byte", Token::Byte),
1055        ("caller", Token::Caller),
1056        ("call", Token::Call),
1057        ("cdrop", Token::Cdrop),
1058        ("cdropw", Token::Cdropw),
1059        ("clk", Token::Clk),
1060        ("const", Token::Const),
1061        ("cswap", Token::Cswap),
1062        ("cswapw", Token::Cswapw),
1063        ("debug", Token::Debug),
1064        ("div", Token::Div),
1065        ("drop", Token::Drop),
1066        ("dropw", Token::Dropw),
1067        ("dup", Token::Dup),
1068        ("dupw", Token::Dupw),
1069        ("dynexec", Token::Dynexec),
1070        ("dyncall", Token::Dyncall),
1071        ("else", Token::Else),
1072        ("emit", Token::Emit),
1073        ("end", Token::End),
1074        ("enum", Token::Enum),
1075        ("eq", Token::Eq),
1076        ("eqw", Token::Eqw),
1077        ("ext2add", Token::Ext2Add),
1078        ("ext2div", Token::Ext2Div),
1079        ("ext2inv", Token::Ext2Inv),
1080        ("ext2mul", Token::Ext2Mul),
1081        ("ext2neg", Token::Ext2Neg),
1082        ("ext2sub", Token::Ext2Sub),
1083        ("err", Token::Err),
1084        ("exec", Token::Exec),
1085        ("exp", Token::Exp),
1086        ("exp.u", Token::ExpU),
1087        ("export", Token::Export),
1088        ("false", Token::False),
1089        ("felt", Token::Felt),
1090        ("fri_ext2fold4", Token::FriExt2Fold4),
1091        ("gt", Token::Gt),
1092        ("gte", Token::Gte),
1093        ("hash", Token::Hash),
1094        ("has_mapkey", Token::HasMapkey),
1095        ("hperm", Token::Hperm),
1096        ("hmerge", Token::Hmerge),
1097        ("i1", Token::I1),
1098        ("i8", Token::I8),
1099        ("i16", Token::I16),
1100        ("i32", Token::I32),
1101        ("i64", Token::I64),
1102        ("i128", Token::I128),
1103        ("if", Token::If),
1104        ("ilog2", Token::ILog2),
1105        ("inv", Token::Inv),
1106        ("is_odd", Token::IsOdd),
1107        ("local", Token::Local),
1108        ("locaddr", Token::Locaddr),
1109        ("loc_load", Token::LocLoad),
1110        ("loc_loadw", Token::LocLoadw),
1111        ("loc_store", Token::LocStore),
1112        ("loc_storew", Token::LocStorew),
1113        ("lt", Token::Lt),
1114        ("lte", Token::Lte),
1115        ("mem", Token::Mem),
1116        ("mem_load", Token::MemLoad),
1117        ("mem_loadw", Token::MemLoadw),
1118        ("mem_loadw_be", Token::MemLoadwBe),
1119        ("mem_loadw_le", Token::MemLoadwLe),
1120        ("mem_store", Token::MemStore),
1121        ("mem_storew", Token::MemStorew),
1122        ("mem_storew_be", Token::MemStorewBe),
1123        ("mem_storew_le", Token::MemStorewLe),
1124        ("mem_stream", Token::MemStream),
1125        ("movdn", Token::Movdn),
1126        ("movdnw", Token::Movdnw),
1127        ("movup", Token::Movup),
1128        ("movupw", Token::Movupw),
1129        ("mtree_get", Token::MtreeGet),
1130        ("mtree_merge", Token::MtreeMerge),
1131        ("mtree_set", Token::MtreeSet),
1132        ("mtree_verify", Token::MtreeVerify),
1133        ("mul", Token::Mul),
1134        ("neg", Token::Neg),
1135        ("neq", Token::Neq),
1136        ("not", Token::Not),
1137        ("nop", Token::Nop),
1138        ("or", Token::Or),
1139        ("padw", Token::Padw),
1140        ("pow2", Token::Pow2),
1141        ("proc", Token::Proc),
1142        ("procref", Token::Procref),
1143        ("ptr", Token::Ptr),
1144        ("push", Token::Push),
1145        ("pub", Token::Pub),
1146        ("horner_eval_base", Token::HornerBase),
1147        ("horner_eval_ext", Token::HornerExt),
1148        ("repeat", Token::Repeat),
1149        ("reversew", Token::Reversew),
1150        ("reversedw", Token::Reversedw),
1151        ("sdepth", Token::Sdepth),
1152        ("stack", Token::Stack),
1153        ("struct", Token::Struct),
1154        ("sub", Token::Sub),
1155        ("swap", Token::Swap),
1156        ("swapw", Token::Swapw),
1157        ("swapdw", Token::Swapdw),
1158        ("syscall", Token::Syscall),
1159        ("trace", Token::Trace),
1160        ("true", Token::True),
1161        ("type", Token::Type),
1162        ("use", Token::Use),
1163        ("u8", Token::U8),
1164        ("u16", Token::U16),
1165        ("u32", Token::U32),
1166        ("u32and", Token::U32And),
1167        ("u32assert", Token::U32Assert),
1168        ("u32assert2", Token::U32Assert2),
1169        ("u32assertw", Token::U32Assertw),
1170        ("u32cast", Token::U32Cast),
1171        ("u32div", Token::U32Div),
1172        ("u32divmod", Token::U32Divmod),
1173        ("u32gt", Token::U32Gt),
1174        ("u32gte", Token::U32Gte),
1175        ("u32lt", Token::U32Lt),
1176        ("u32lte", Token::U32Lte),
1177        ("u32max", Token::U32Max),
1178        ("u32min", Token::U32Min),
1179        ("u32mod", Token::U32Mod),
1180        ("u32not", Token::U32Not),
1181        ("u32or", Token::U32Or),
1182        ("u32overflowing_add", Token::U32OverflowingAdd),
1183        ("u32overflowing_add3", Token::U32OverflowingAdd3),
1184        ("u32overflowing_madd", Token::U32OverflowingMadd),
1185        ("u32overflowing_mul", Token::U32OverflowingMul),
1186        ("u32overflowing_sub", Token::U32OverflowingSub),
1187        ("u32popcnt", Token::U32Popcnt),
1188        ("u32clz", Token::U32Clz),
1189        ("u32ctz", Token::U32Ctz),
1190        ("u32clo", Token::U32Clo),
1191        ("u32cto", Token::U32Cto),
1192        ("u32rotl", Token::U32Rotl),
1193        ("u32rotr", Token::U32Rotr),
1194        ("u32shl", Token::U32Shl),
1195        ("u32shr", Token::U32Shr),
1196        ("u32split", Token::U32Split),
1197        ("u32test", Token::U32Test),
1198        ("u32testw", Token::U32Testw),
1199        ("u32wrapping_add", Token::U32WrappingAdd),
1200        ("u32wrapping_add3", Token::U32WrappingAdd3),
1201        ("u32wrapping_madd", Token::U32WrappingMadd),
1202        ("u32wrapping_mul", Token::U32WrappingMul),
1203        ("u32wrapping_sub", Token::U32WrappingSub),
1204        ("u32xor", Token::U32Xor),
1205        ("u64", Token::U64),
1206        ("u128", Token::U128),
1207        ("while", Token::While),
1208        ("word", Token::Word),
1209        ("event", Token::Event),
1210        ("xor", Token::Xor),
1211    ];
1212
1213    /// Constructs a DFA capable of recognizing Miden Assembly keywords.
1214    ///
1215    /// Constructing the state machine is expensive, so it should not be done in hot code. Instead,
1216    /// prefer to construct it once and reuse it many times.
1217    ///
1218    /// Currently we construct an instance of this searcher in the lexer, which is then used to
1219    /// select a keyword token or construct an identifier token depending on whether a given string
1220    /// is a known keyword.
1221    pub fn keyword_searcher() -> aho_corasick::AhoCorasick {
1222        use aho_corasick::AhoCorasick;
1223
1224        // Execute a search for any of the keywords above, matching longest first, and requiring
1225        // the match to cover the entire input.
1226        AhoCorasick::builder()
1227            .match_kind(aho_corasick::MatchKind::LeftmostLongest)
1228            .start_kind(aho_corasick::StartKind::Anchored)
1229            .build(Self::KEYWORDS.iter().map(|(kw, _)| kw).copied())
1230            .expect("unable to build aho-corasick searcher for token")
1231    }
1232
1233    /// Returns an appropriate [Token] depending on whether the given string is a keyword or an
1234    /// identifier.
1235    ///
1236    /// NOTE: This constructs and throws away an expensive-to-construct Aho-Corasick state machine.
1237    /// You should not call this from any code on a hot path. Instead, construct the state machine
1238    /// once using [Token::keyword_searcher], and reuse it for all searches using
1239    /// [Token::from_keyword_or_ident_with_searcher].
1240    ///
1241    /// Currently, this function is only called along one code path, which is when we are
1242    /// constructing a parser error in which we wish to determine which, if any, of the expected
1243    /// tokens are instruction opcode keywords, so we can collapse them into a more user-friendly
1244    /// error message. This is not on a hot path, so we don't care if it is a bit slow.
1245    pub fn from_keyword_or_ident(s: &'input str) -> Self {
1246        let searcher = Self::keyword_searcher();
1247        Self::from_keyword_or_ident_with_searcher(s, &searcher)
1248    }
1249
1250    /// This is the primary function you should use when you wish to get an appropriate token for
1251    /// a given input string, depending on whether it is a keyword or an identifier.
1252    ///
1253    /// See [Token::keyword_searcher] for additional information on how this is meant to be used.
1254    pub fn from_keyword_or_ident_with_searcher(
1255        s: &'input str,
1256        searcher: &aho_corasick::AhoCorasick,
1257    ) -> Self {
1258        let input = aho_corasick::Input::new(s).anchored(aho_corasick::Anchored::Yes);
1259        match searcher.find(input) {
1260            // No match, it's an ident
1261            None => Token::Ident(s),
1262            // If the match is not exact, it's an ident
1263            Some(matched) if matched.len() != s.len() => Token::Ident(s),
1264            // Otherwise clone the Token corresponding to the keyword that was matched
1265            Some(matched) => Self::KEYWORDS[matched.pattern().as_usize()].1.clone(),
1266        }
1267    }
1268
1269    /// Parses a [Token] from a string corresponding to that token.
1270    ///
1271    /// This solely exists to aid in constructing more user-friendly error messages in certain
1272    /// scenarios, and is otherwise not used (nor should it be). It is quite expensive to call due
1273    /// to invoking [Token::keyword_searcher] under the covers. See the documentation for that
1274    /// function for more details.
1275    pub fn parse(s: &'input str) -> Option<Token<'input>> {
1276        match Token::from_keyword_or_ident(s) {
1277            Token::Ident(_) => {
1278                // Nope, try again
1279                match s {
1280                    "@" => Some(Token::At),
1281                    "!" => Some(Token::Bang),
1282                    ":" => Some(Token::Colon),
1283                    "::" => Some(Token::ColonColon),
1284                    "." => Some(Token::Dot),
1285                    "," => Some(Token::Comma),
1286                    "=" => Some(Token::Equal),
1287                    "<" => Some(Token::Langle),
1288                    "(" => Some(Token::Lparen),
1289                    "{" => Some(Token::Lbrace),
1290                    "[" => Some(Token::Lbracket),
1291                    "-" => Some(Token::Minus),
1292                    "+" => Some(Token::Plus),
1293                    ";" => Some(Token::Semicolon),
1294                    "//" => Some(Token::SlashSlash),
1295                    "/" => Some(Token::Slash),
1296                    "*" => Some(Token::Star),
1297                    ">" => Some(Token::Rangle),
1298                    ")" => Some(Token::Rparen),
1299                    "}" => Some(Token::Rbrace),
1300                    "]" => Some(Token::Rbracket),
1301                    "->" => Some(Token::Rstab),
1302                    ".." => Some(Token::Range),
1303                    "end of file" => Some(Token::Eof),
1304                    "module doc" => {
1305                        Some(Token::DocComment(DocumentationType::Module(String::new())))
1306                    },
1307                    "doc comment" => {
1308                        Some(Token::DocComment(DocumentationType::Form(String::new())))
1309                    },
1310                    "comment" => Some(Token::Comment),
1311                    "hex-encoded value" => Some(Token::HexValue(IntValue::U8(0))),
1312                    "hex-encoded word" => Some(Token::HexWord(WordValue([Felt::ZERO; 4]))),
1313                    "bin-encoded value" => Some(Token::BinValue(BinEncodedValue::U8(0))),
1314                    "integer" => Some(Token::Int(0)),
1315                    "identifier" => Some(Token::Ident("")),
1316                    "constant identifier" => Some(Token::ConstantIdent("")),
1317                    "quoted identifier" => Some(Token::QuotedIdent("")),
1318                    "quoted string" => Some(Token::QuotedString("")),
1319                    _ => None,
1320                }
1321            },
1322            // We matched a keyword
1323            token => Some(token),
1324        }
1325    }
1326}