Skip to main content

miden_assembly_syntax/parser/
token.rs

1use alloc::string::String;
2use core::fmt;
3
4use miden_core::{
5    Felt,
6    field::PrimeField64,
7    serde::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable},
8};
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12// DOCUMENTATION TYPE
13// ================================================================================================
14
15/// Represents the scope of a given documentation comment
16#[derive(Debug, Clone)]
17pub enum DocumentationType {
18    Module(String),
19    Form(String),
20}
21
22impl From<DocumentationType> for String {
23    fn from(doc: DocumentationType) -> Self {
24        match doc {
25            DocumentationType::Module(s) => s,
26            DocumentationType::Form(s) => s,
27        }
28    }
29}
30
31impl core::ops::Deref for DocumentationType {
32    type Target = String;
33    fn deref(&self) -> &Self::Target {
34        match self {
35            Self::Module(s) => s,
36            Self::Form(s) => s,
37        }
38    }
39}
40
41// PUSH VALUE
42// ================================================================================================
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum PushValue {
46    Int(IntValue),
47    Word(WordValue),
48}
49
50impl From<u8> for PushValue {
51    fn from(value: u8) -> Self {
52        Self::Int(value.into())
53    }
54}
55
56impl From<u16> for PushValue {
57    fn from(value: u16) -> Self {
58        Self::Int(value.into())
59    }
60}
61
62impl From<u32> for PushValue {
63    fn from(value: u32) -> Self {
64        Self::Int(value.into())
65    }
66}
67
68impl From<Felt> for PushValue {
69    fn from(value: Felt) -> Self {
70        Self::Int(value.into())
71    }
72}
73
74impl From<IntValue> for PushValue {
75    fn from(value: IntValue) -> Self {
76        Self::Int(value)
77    }
78}
79
80impl From<WordValue> for PushValue {
81    fn from(value: WordValue) -> Self {
82        Self::Word(value)
83    }
84}
85
86impl fmt::Display for PushValue {
87    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88        match self {
89            Self::Int(value) => fmt::Display::fmt(value, f),
90            Self::Word(value) => fmt::Display::fmt(value, f),
91        }
92    }
93}
94
95impl crate::prettier::PrettyPrint for PushValue {
96    fn render(&self) -> crate::prettier::Document {
97        match self {
98            Self::Int(value) => value.render(),
99            Self::Word(value) => value.render(),
100        }
101    }
102}
103
104// WORD VALUE
105// ================================================================================================
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
109#[cfg_attr(feature = "serde", serde(transparent))]
110#[cfg_attr(
111    all(feature = "arbitrary", test),
112    miden_test_serde_macros::serde_test(binary_serde(true))
113)]
114pub struct WordValue(pub [Felt; 4]);
115
116impl fmt::Display for WordValue {
117    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118        let mut builder = f.debug_list();
119        for value in self.0 {
120            builder.entry(&value.as_canonical_u64());
121        }
122        builder.finish()
123    }
124}
125
126impl crate::prettier::PrettyPrint for WordValue {
127    fn render(&self) -> crate::prettier::Document {
128        use crate::prettier::*;
129
130        const_text("[")
131            + self
132                .0
133                .iter()
134                .copied()
135                .map(display)
136                .reduce(|acc, doc| acc + const_text(",") + doc)
137                .unwrap_or_default()
138            + const_text("]")
139    }
140}
141
142impl PartialOrd for WordValue {
143    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
144        Some(self.cmp(other))
145    }
146}
147impl Ord for WordValue {
148    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
149        let (WordValue([l0, l1, l2, l3]), WordValue([r0, r1, r2, r3])) = (self, other);
150        l0.as_canonical_u64()
151            .cmp(&r0.as_canonical_u64())
152            .then_with(|| l1.as_canonical_u64().cmp(&r1.as_canonical_u64()))
153            .then_with(|| l2.as_canonical_u64().cmp(&r2.as_canonical_u64()))
154            .then_with(|| l3.as_canonical_u64().cmp(&r3.as_canonical_u64()))
155    }
156}
157
158impl core::hash::Hash for WordValue {
159    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
160        let WordValue([a, b, c, d]) = self;
161        [
162            a.as_canonical_u64(),
163            b.as_canonical_u64(),
164            c.as_canonical_u64(),
165            d.as_canonical_u64(),
166        ]
167        .hash(state)
168    }
169}
170
171#[cfg(feature = "arbitrary")]
172impl proptest::arbitrary::Arbitrary for WordValue {
173    type Parameters = ();
174
175    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
176        use proptest::{array::uniform4, strategy::Strategy};
177        uniform4((0..crate::FIELD_MODULUS).prop_map(Felt::new))
178            .prop_map(WordValue)
179            .no_shrink()  // Pure random values, no meaningful shrinking pattern
180            .boxed()
181    }
182
183    type Strategy = proptest::prelude::BoxedStrategy<Self>;
184}
185
186impl Serializable for WordValue {
187    fn write_into<W: ByteWriter>(&self, target: &mut W) {
188        self.0[0].write_into(target);
189        self.0[1].write_into(target);
190        self.0[2].write_into(target);
191        self.0[3].write_into(target);
192    }
193}
194
195impl Deserializable for WordValue {
196    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
197        let a = Felt::read_from(source)?;
198        let b = Felt::read_from(source)?;
199        let c = Felt::read_from(source)?;
200        let d = Felt::read_from(source)?;
201        Ok(Self([a, b, c, d]))
202    }
203}
204
205// INT VALUE
206// ================================================================================================
207
208/// Represents one of the various types of values that have a hex-encoded representation in Miden
209/// Assembly source files.
210#[derive(Debug, Copy, Clone, PartialEq, Eq)]
211#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
212#[cfg_attr(feature = "serde", serde(untagged))]
213#[cfg_attr(
214    all(feature = "arbitrary", test),
215    miden_test_serde_macros::serde_test(binary_serde(true))
216)]
217pub enum IntValue {
218    /// A tiny value
219    U8(u8),
220    /// A small value
221    U16(u16),
222    /// A u32 constant, typically represents a memory address
223    U32(u32),
224    /// A single field element, 8 bytes, encoded as 16 hex digits
225    Felt(Felt),
226}
227
228impl From<u8> for IntValue {
229    fn from(value: u8) -> Self {
230        Self::U8(value)
231    }
232}
233
234impl From<u16> for IntValue {
235    fn from(value: u16) -> Self {
236        Self::U16(value)
237    }
238}
239
240impl From<u32> for IntValue {
241    fn from(value: u32) -> Self {
242        Self::U32(value)
243    }
244}
245
246impl From<Felt> for IntValue {
247    fn from(value: Felt) -> Self {
248        Self::Felt(value)
249    }
250}
251
252impl IntValue {
253    pub fn as_int(&self) -> u64 {
254        match self {
255            Self::U8(value) => *value as u64,
256            Self::U16(value) => *value as u64,
257            Self::U32(value) => *value as u64,
258            Self::Felt(value) => value.as_canonical_u64(),
259        }
260    }
261
262    /// Returns the value as a `u64`.
263    ///
264    /// This is an alias for [`as_int`](Self::as_int) that matches the `Felt` API,
265    /// allowing the generated grammar code to use a consistent method name.
266    pub fn as_canonical_u64(&self) -> u64 {
267        self.as_int()
268    }
269
270    pub fn checked_add(&self, rhs: Self) -> Option<Self> {
271        let value = self.as_int().checked_add(rhs.as_int())?;
272        if value >= crate::FIELD_MODULUS {
273            return None;
274        }
275        Some(super::lexer::shrink_u64_hex(value))
276    }
277
278    pub fn checked_sub(&self, rhs: Self) -> Option<Self> {
279        let value = self.as_int().checked_sub(rhs.as_int())?;
280        if value >= crate::FIELD_MODULUS {
281            return None;
282        }
283        Some(super::lexer::shrink_u64_hex(value))
284    }
285
286    pub fn checked_mul(&self, rhs: Self) -> Option<Self> {
287        let value = self.as_int().checked_mul(rhs.as_int())?;
288        if value >= crate::FIELD_MODULUS {
289            return None;
290        }
291        Some(super::lexer::shrink_u64_hex(value))
292    }
293
294    pub fn checked_div(&self, rhs: Self) -> Option<Self> {
295        let value = self.as_int().checked_div(rhs.as_int())?;
296        if value >= crate::FIELD_MODULUS {
297            return None;
298        }
299        Some(super::lexer::shrink_u64_hex(value))
300    }
301}
302
303impl core::ops::Add<IntValue> for IntValue {
304    type Output = IntValue;
305
306    fn add(self, rhs: IntValue) -> Self::Output {
307        super::lexer::shrink_u64_hex(self.as_int() + rhs.as_int())
308    }
309}
310
311impl core::ops::Sub<IntValue> for IntValue {
312    type Output = IntValue;
313
314    fn sub(self, rhs: IntValue) -> Self::Output {
315        super::lexer::shrink_u64_hex(self.as_int() - rhs.as_int())
316    }
317}
318
319impl core::ops::Mul<IntValue> for IntValue {
320    type Output = IntValue;
321
322    fn mul(self, rhs: IntValue) -> Self::Output {
323        super::lexer::shrink_u64_hex(self.as_int() * rhs.as_int())
324    }
325}
326
327impl core::ops::Div<IntValue> for IntValue {
328    type Output = IntValue;
329
330    fn div(self, rhs: IntValue) -> Self::Output {
331        super::lexer::shrink_u64_hex(self.as_int() / rhs.as_int())
332    }
333}
334
335impl PartialEq<Felt> for IntValue {
336    fn eq(&self, other: &Felt) -> bool {
337        match self {
338            Self::U8(lhs) => (*lhs as u64) == other.as_canonical_u64(),
339            Self::U16(lhs) => (*lhs as u64) == other.as_canonical_u64(),
340            Self::U32(lhs) => (*lhs as u64) == other.as_canonical_u64(),
341            Self::Felt(lhs) => lhs == other,
342        }
343    }
344}
345
346impl fmt::Display for IntValue {
347    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
348        match self {
349            Self::U8(value) => write!(f, "{value}"),
350            Self::U16(value) => write!(f, "{value}"),
351            Self::U32(value) => write!(f, "{value:#04x}"),
352            Self::Felt(value) => write!(f, "{:#08x}", &value.as_canonical_u64().to_be()),
353        }
354    }
355}
356
357impl crate::prettier::PrettyPrint for IntValue {
358    fn render(&self) -> crate::prettier::Document {
359        match self {
360            Self::U8(v) => v.render(),
361            Self::U16(v) => v.render(),
362            Self::U32(v) => v.render(),
363            Self::Felt(v) => v.as_canonical_u64().render(),
364        }
365    }
366}
367
368impl PartialOrd for IntValue {
369    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
370        Some(self.cmp(other))
371    }
372}
373
374impl Ord for IntValue {
375    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
376        use core::cmp::Ordering;
377        match (self, other) {
378            (Self::U8(l), Self::U8(r)) => l.cmp(r),
379            (Self::U8(_), _) => Ordering::Less,
380            (Self::U16(_), Self::U8(_)) => Ordering::Greater,
381            (Self::U16(l), Self::U16(r)) => l.cmp(r),
382            (Self::U16(_), _) => Ordering::Less,
383            (Self::U32(_), Self::U8(_) | Self::U16(_)) => Ordering::Greater,
384            (Self::U32(l), Self::U32(r)) => l.cmp(r),
385            (Self::U32(_), _) => Ordering::Less,
386            (Self::Felt(_), Self::U8(_) | Self::U16(_) | Self::U32(_)) => Ordering::Greater,
387            (Self::Felt(l), Self::Felt(r)) => l.as_canonical_u64().cmp(&r.as_canonical_u64()),
388        }
389    }
390}
391
392impl core::hash::Hash for IntValue {
393    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
394        core::mem::discriminant(self).hash(state);
395        match self {
396            Self::U8(value) => value.hash(state),
397            Self::U16(value) => value.hash(state),
398            Self::U32(value) => value.hash(state),
399            Self::Felt(value) => value.as_canonical_u64().hash(state),
400        }
401    }
402}
403
404impl Serializable for IntValue {
405    fn write_into<W: ByteWriter>(&self, target: &mut W) {
406        self.as_int().write_into(target)
407    }
408}
409
410impl Deserializable for IntValue {
411    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
412        let raw = source.read_u64()?;
413        if raw >= Felt::ORDER_U64 {
414            Err(DeserializationError::InvalidValue(
415                "int value is greater than field modulus".into(),
416            ))
417        } else {
418            Ok(super::lexer::shrink_u64_hex(raw))
419        }
420    }
421}
422
423#[cfg(feature = "arbitrary")]
424impl proptest::arbitrary::Arbitrary for IntValue {
425    type Parameters = ();
426
427    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
428        use proptest::{num, prop_oneof, strategy::Strategy};
429        prop_oneof![
430            // U8 values - full range
431            num::u8::ANY.prop_map(IntValue::U8),
432            // U16 values that don't overlap with U8 to preserve variant during serialization
433            (u8::MAX as u16 + 1..=u16::MAX).prop_map(IntValue::U16),
434            // U32 values that don't overlap with U8/U16 to preserve variant during serialization
435            (u16::MAX as u32 + 1..=u32::MAX).prop_map(IntValue::U32),
436            // Felt values - values that don't fit in u32 but are within field modulus
437            (num::u64::ANY)
438                .prop_filter_map("valid felt value", |n| {
439                    if n > u32::MAX as u64 && n < crate::FIELD_MODULUS {
440                        Some(IntValue::Felt(Felt::new(n)))
441                    } else {
442                        None
443                    }
444                }),
445        ]
446        .no_shrink()  // Pure random values, no meaningful shrinking pattern
447        .boxed()
448    }
449
450    type Strategy = proptest::prelude::BoxedStrategy<Self>;
451}
452
453// BINARY ENCODED VALUE
454// ================================================================================================
455
456/// Represents one of the various types of values that have a hex-encoded representation in Miden
457/// Assembly source files.
458#[derive(Debug, Copy, Clone, PartialEq, Eq)]
459pub enum BinEncodedValue {
460    /// A tiny value
461    U8(u8),
462    /// A small value
463    U16(u16),
464    /// A u32 constant, typically represents a memory address
465    U32(u32),
466}
467
468// TOKEN
469// ================================================================================================
470
471/// The token type produced by [crate::parser::Lexer], and consumed by the parser.
472#[derive(Debug, Clone)]
473pub enum Token<'input> {
474    Add,
475    Addrspace,
476    Adv,
477    AdvMap,
478    InsertHdword,
479    InsertHdwordWithDomain,
480    InsertHqword,
481    InsertHperm,
482    InsertMem,
483    AdvLoadw,
484    AdvPipe,
485    AdvPush,
486    AdvStack,
487    PushMapval,
488    PushMapvalCount,
489    PushMapvaln,
490    PushMtnode,
491    And,
492    Assert,
493    Assertz,
494    AssertEq,
495    AssertEqw,
496    EvalCircuit,
497    Begin,
498    Byte,
499    Caller,
500    Call,
501    Cdrop,
502    Cdropw,
503    Clk,
504    Const,
505    CryptoStream,
506    Cswap,
507    Cswapw,
508    Debug,
509    Div,
510    Drop,
511    Dropw,
512    Dup,
513    Dupw,
514    Dynexec,
515    Dyncall,
516    Else,
517    Emit,
518    End,
519    Enum,
520    Eq,
521    Eqw,
522    Ext2Add,
523    Ext2Div,
524    Ext2Inv,
525    Ext2Mul,
526    Ext2Neg,
527    Ext2Sub,
528    Err,
529    Exec,
530    Export,
531    Exp,
532    ExpU,
533    False,
534    Felt,
535    FriExt2Fold4,
536    Gt,
537    Gte,
538    Hash,
539    HasMapkey,
540    HornerBase,
541    HornerExt,
542    LogPrecompile,
543    Hperm,
544    Hmerge,
545    I1,
546    I8,
547    I16,
548    I32,
549    I64,
550    I128,
551    If,
552    ILog2,
553    Inv,
554    IsOdd,
555    Local,
556    Locaddr,
557    LocLoad,
558    LocLoadw,
559    LocLoadwBe,
560    LocLoadwLe,
561    LocStore,
562    LocStorew,
563    LocStorewBe,
564    LocStorewLe,
565    Lt,
566    Lte,
567    Mem,
568    MemLoad,
569    MemLoadw,
570    MemLoadwBe,
571    MemLoadwLe,
572    MemStore,
573    MemStorew,
574    MemStorewBe,
575    MemStorewLe,
576    MemStream,
577    Movdn,
578    Movdnw,
579    Movup,
580    Movupw,
581    MtreeGet,
582    MtreeMerge,
583    MtreeSet,
584    MtreeVerify,
585    Mul,
586    Neg,
587    Neq,
588    Not,
589    Nop,
590    Or,
591    Padw,
592    Pow2,
593    Proc,
594    Procref,
595    Ptr,
596    Pub,
597    Push,
598    Repeat,
599    Reversew,
600    Reversedw,
601    Range,
602    Sdepth,
603    Stack,
604    Struct,
605    Sub,
606    Swap,
607    Swapw,
608    Swapdw,
609    Syscall,
610    Trace,
611    True,
612    Type,
613    Use,
614    U8,
615    U16,
616    U32,
617    U32And,
618    U32Assert,
619    U32Assert2,
620    U32Assertw,
621    U32Cast,
622    U32Div,
623    U32Divmod,
624    U32Gt,
625    U32Gte,
626    U32Lt,
627    U32Lte,
628    U32Max,
629    U32Min,
630    U32Mod,
631    U32Not,
632    U32Or,
633    U32OverflowingAdd,
634    U32OverflowingAdd3,
635    U32WideningAdd,
636    U32WideningAdd3,
637    U32WideningMadd,
638    U32WideningMul,
639    U32OverflowingSub,
640    U32Popcnt,
641    U32Clz,
642    U32Ctz,
643    U32Clo,
644    U32Cto,
645    U32Rotl,
646    U32Rotr,
647    U32Shl,
648    U32Shr,
649    U32Split,
650    U32Test,
651    U32Testw,
652    U32WrappingAdd,
653    U32WrappingAdd3,
654    U32WrappingMadd,
655    U32WrappingMul,
656    U32WrappingSub,
657    U32Xor,
658    U64,
659    U128,
660    While,
661    Word,
662    Event,
663    Xor,
664    At,
665    Bang,
666    Colon,
667    ColonColon,
668    Dot,
669    Comma,
670    Equal,
671    Langle,
672    Lparen,
673    Lbrace,
674    Lbracket,
675    Minus,
676    Plus,
677    Semicolon,
678    SlashSlash,
679    Slash,
680    Star,
681    Rangle,
682    Rparen,
683    Rbrace,
684    Rbracket,
685    Rstab,
686    DocComment(DocumentationType),
687    HexValue(IntValue),
688    HexWord(WordValue),
689    BinValue(BinEncodedValue),
690    Int(u64),
691    Ident(&'input str),
692    ConstantIdent(&'input str),
693    QuotedIdent(&'input str),
694    QuotedString(&'input str),
695    Comment,
696    Eof,
697}
698
699impl fmt::Display for Token<'_> {
700    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
701        match self {
702            Token::Add => write!(f, "add"),
703            Token::Addrspace => write!(f, "addrspace"),
704            Token::Adv => write!(f, "adv"),
705            Token::AdvMap => write!(f, "adv_map"),
706            Token::AdvStack => write!(f, "adv_stack"),
707            Token::InsertHdword => write!(f, "insert_hdword"),
708            Token::InsertHdwordWithDomain => write!(f, "insert_hdword_d"),
709            Token::InsertHqword => write!(f, "insert_hqword"),
710            Token::InsertHperm => write!(f, "insert_hperm"),
711            Token::InsertMem => write!(f, "insert_mem"),
712            Token::AdvLoadw => write!(f, "adv_loadw"),
713            Token::AdvPipe => write!(f, "adv_pipe"),
714            Token::AdvPush => write!(f, "adv_push"),
715            Token::PushMapval => write!(f, "push_mapval"),
716            Token::PushMapvalCount => write!(f, "push_mapval_count"),
717            Token::PushMapvaln => write!(f, "push_mapvaln"),
718            Token::PushMtnode => write!(f, "push_mtnode"),
719            Token::And => write!(f, "and"),
720            Token::Assert => write!(f, "assert"),
721            Token::Assertz => write!(f, "assertz"),
722            Token::AssertEq => write!(f, "assert_eq"),
723            Token::AssertEqw => write!(f, "assert_eqw"),
724            Token::EvalCircuit => write!(f, "eval_circuit"),
725            Token::Begin => write!(f, "begin"),
726            Token::Byte => write!(f, "byte"),
727            Token::Caller => write!(f, "caller"),
728            Token::Call => write!(f, "call"),
729            Token::Cdrop => write!(f, "cdrop"),
730            Token::Cdropw => write!(f, "cdropw"),
731            Token::Clk => write!(f, "clk"),
732            Token::Const => write!(f, "const"),
733            Token::CryptoStream => write!(f, "crypto_stream"),
734            Token::Cswap => write!(f, "cswap"),
735            Token::Cswapw => write!(f, "cswapw"),
736            Token::Debug => write!(f, "debug"),
737            Token::Div => write!(f, "div"),
738            Token::Drop => write!(f, "drop"),
739            Token::Dropw => write!(f, "dropw"),
740            Token::Dup => write!(f, "dup"),
741            Token::Dupw => write!(f, "dupw"),
742            Token::Dynexec => write!(f, "dynexec"),
743            Token::Dyncall => write!(f, "dyncall"),
744            Token::Else => write!(f, "else"),
745            Token::Emit => write!(f, "emit"),
746            Token::End => write!(f, "end"),
747            Token::Enum => write!(f, "enum"),
748            Token::Eq => write!(f, "eq"),
749            Token::Eqw => write!(f, "eqw"),
750            Token::Ext2Add => write!(f, "ext2add"),
751            Token::Ext2Div => write!(f, "ext2div"),
752            Token::Ext2Inv => write!(f, "ext2inv"),
753            Token::Ext2Mul => write!(f, "ext2mul"),
754            Token::Ext2Neg => write!(f, "ext2neg"),
755            Token::Ext2Sub => write!(f, "ext2sub"),
756            Token::Err => write!(f, "err"),
757            Token::Exec => write!(f, "exec"),
758            Token::Exp => write!(f, "exp"),
759            Token::ExpU => write!(f, "exp.u"),
760            Token::Export => write!(f, "export"),
761            Token::False => write!(f, "false"),
762            Token::Felt => write!(f, "felt"),
763            Token::FriExt2Fold4 => write!(f, "fri_ext2fold4"),
764            Token::Gt => write!(f, "gt"),
765            Token::Gte => write!(f, "gte"),
766            Token::Hash => write!(f, "hash"),
767            Token::HasMapkey => write!(f, "has_mapkey"),
768            Token::Hperm => write!(f, "hperm"),
769            Token::Hmerge => write!(f, "hmerge"),
770            Token::I1 => write!(f, "i1"),
771            Token::I8 => write!(f, "i8"),
772            Token::I16 => write!(f, "i16"),
773            Token::I32 => write!(f, "i32"),
774            Token::I64 => write!(f, "i64"),
775            Token::I128 => write!(f, "i128"),
776            Token::If => write!(f, "if"),
777            Token::ILog2 => write!(f, "ilog2"),
778            Token::Inv => write!(f, "inv"),
779            Token::IsOdd => write!(f, "is_odd"),
780            Token::Local => write!(f, "local"),
781            Token::Locaddr => write!(f, "locaddr"),
782            Token::LocLoad => write!(f, "loc_load"),
783            Token::LocLoadw => write!(f, "loc_loadw"),
784            Token::LocLoadwBe => write!(f, "loc_loadw_be"),
785            Token::LocLoadwLe => write!(f, "loc_loadw_le"),
786            Token::LocStore => write!(f, "loc_store"),
787            Token::LocStorew => write!(f, "loc_storew"),
788            Token::LocStorewBe => write!(f, "loc_storew_be"),
789            Token::LocStorewLe => write!(f, "loc_storew_le"),
790            Token::Lt => write!(f, "lt"),
791            Token::Lte => write!(f, "lte"),
792            Token::Mem => write!(f, "mem"),
793            Token::MemLoad => write!(f, "mem_load"),
794            Token::MemLoadw => write!(f, "mem_loadw"),
795            Token::MemLoadwBe => write!(f, "mem_loadw_be"),
796            Token::MemLoadwLe => write!(f, "mem_loadw_le"),
797            Token::MemStore => write!(f, "mem_store"),
798            Token::MemStorew => write!(f, "mem_storew"),
799            Token::MemStorewBe => write!(f, "mem_storew_be"),
800            Token::MemStorewLe => write!(f, "mem_storew_le"),
801            Token::MemStream => write!(f, "mem_stream"),
802            Token::Movdn => write!(f, "movdn"),
803            Token::Movdnw => write!(f, "movdnw"),
804            Token::Movup => write!(f, "movup"),
805            Token::Movupw => write!(f, "movupw"),
806            Token::MtreeGet => write!(f, "mtree_get"),
807            Token::MtreeMerge => write!(f, "mtree_merge"),
808            Token::MtreeSet => write!(f, "mtree_set"),
809            Token::MtreeVerify => write!(f, "mtree_verify"),
810            Token::Mul => write!(f, "mul"),
811            Token::Neg => write!(f, "neg"),
812            Token::Neq => write!(f, "neq"),
813            Token::Not => write!(f, "not"),
814            Token::Nop => write!(f, "nop"),
815            Token::Or => write!(f, "or"),
816            Token::Padw => write!(f, "padw"),
817            Token::Pow2 => write!(f, "pow2"),
818            Token::Proc => write!(f, "proc"),
819            Token::Procref => write!(f, "procref"),
820            Token::Ptr => write!(f, "ptr"),
821            Token::Pub => write!(f, "pub"),
822            Token::Push => write!(f, "push"),
823            Token::HornerBase => write!(f, "horner_eval_base"),
824            Token::HornerExt => write!(f, "horner_eval_ext"),
825            Token::LogPrecompile => write!(f, "log_precompile"),
826            Token::Repeat => write!(f, "repeat"),
827            Token::Reversew => write!(f, "reversew"),
828            Token::Reversedw => write!(f, "reversedw"),
829            Token::Sdepth => write!(f, "sdepth"),
830            Token::Stack => write!(f, "stack"),
831            Token::Struct => write!(f, "struct"),
832            Token::Sub => write!(f, "sub"),
833            Token::Swap => write!(f, "swap"),
834            Token::Swapw => write!(f, "swapw"),
835            Token::Swapdw => write!(f, "swapdw"),
836            Token::Syscall => write!(f, "syscall"),
837            Token::Trace => write!(f, "trace"),
838            Token::True => write!(f, "true"),
839            Token::Type => write!(f, "type"),
840            Token::Use => write!(f, "use"),
841            Token::U8 => write!(f, "u8"),
842            Token::U16 => write!(f, "u16"),
843            Token::U32 => write!(f, "u32"),
844            Token::U32And => write!(f, "u32and"),
845            Token::U32Assert => write!(f, "u32assert"),
846            Token::U32Assert2 => write!(f, "u32assert2"),
847            Token::U32Assertw => write!(f, "u32assertw"),
848            Token::U32Cast => write!(f, "u32cast"),
849            Token::U32Div => write!(f, "u32div"),
850            Token::U32Divmod => write!(f, "u32divmod"),
851            Token::U32Gt => write!(f, "u32gt"),
852            Token::U32Gte => write!(f, "u32gte"),
853            Token::U32Lt => write!(f, "u32lt"),
854            Token::U32Lte => write!(f, "u32lte"),
855            Token::U32Max => write!(f, "u32max"),
856            Token::U32Min => write!(f, "u32min"),
857            Token::U32Mod => write!(f, "u32mod"),
858            Token::U32Not => write!(f, "u32not"),
859            Token::U32Or => write!(f, "u32or"),
860            Token::U32OverflowingAdd => write!(f, "u32overflowing_add"),
861            Token::U32OverflowingAdd3 => write!(f, "u32overflowing_add3"),
862            Token::U32WideningAdd => write!(f, "u32widening_add"),
863            Token::U32WideningAdd3 => write!(f, "u32widening_add3"),
864            Token::U32WideningMadd => write!(f, "u32widening_madd"),
865            Token::U32WideningMul => write!(f, "u32widening_mul"),
866            Token::U32OverflowingSub => write!(f, "u32overflowing_sub"),
867            Token::U32Popcnt => write!(f, "u32popcnt"),
868            Token::U32Clz => write!(f, "u32clz"),
869            Token::U32Ctz => write!(f, "u32ctz"),
870            Token::U32Clo => write!(f, "u32clo"),
871            Token::U32Cto => write!(f, "u32cto"),
872            Token::U32Rotl => write!(f, "u32rotl"),
873            Token::U32Rotr => write!(f, "u32rotr"),
874            Token::U32Shl => write!(f, "u32shl"),
875            Token::U32Shr => write!(f, "u32shr"),
876            Token::U32Split => write!(f, "u32split"),
877            Token::U32Test => write!(f, "u32test"),
878            Token::U32Testw => write!(f, "u32testw"),
879            Token::U32WrappingAdd => write!(f, "u32wrapping_add"),
880            Token::U32WrappingAdd3 => write!(f, "u32wrapping_add3"),
881            Token::U32WrappingMadd => write!(f, "u32wrapping_madd"),
882            Token::U32WrappingMul => write!(f, "u32wrapping_mul"),
883            Token::U32WrappingSub => write!(f, "u32wrapping_sub"),
884            Token::U32Xor => write!(f, "u32xor"),
885            Token::U64 => write!(f, "u64"),
886            Token::U128 => write!(f, "u128"),
887            Token::While => write!(f, "while"),
888            Token::Word => write!(f, "word"),
889            Token::Event => write!(f, "event"),
890            Token::Xor => write!(f, "xor"),
891            Token::At => write!(f, "@"),
892            Token::Bang => write!(f, "!"),
893            Token::Colon => write!(f, ":"),
894            Token::ColonColon => write!(f, "::"),
895            Token::Dot => write!(f, "."),
896            Token::Comma => write!(f, ","),
897            Token::Equal => write!(f, "="),
898            Token::Langle => write!(f, "<"),
899            Token::Lparen => write!(f, "("),
900            Token::Lbrace => write!(f, "{{"),
901            Token::Lbracket => write!(f, "["),
902            Token::Minus => write!(f, "-"),
903            Token::Plus => write!(f, "+"),
904            Token::Semicolon => write!(f, ";"),
905            Token::SlashSlash => write!(f, "//"),
906            Token::Slash => write!(f, "/"),
907            Token::Star => write!(f, "*"),
908            Token::Rangle => write!(f, ">"),
909            Token::Rparen => write!(f, ")"),
910            Token::Rbrace => write!(f, "}}"),
911            Token::Rbracket => write!(f, "]"),
912            Token::Rstab => write!(f, "->"),
913            Token::Range => write!(f, ".."),
914            Token::DocComment(DocumentationType::Module(_)) => f.write_str("module doc"),
915            Token::DocComment(DocumentationType::Form(_)) => f.write_str("doc comment"),
916            Token::HexValue(_) => f.write_str("hex-encoded value"),
917            Token::HexWord(_) => f.write_str("hex-encoded word"),
918            Token::BinValue(_) => f.write_str("bin-encoded value"),
919            Token::Int(_) => f.write_str("integer"),
920            Token::Ident(_) => f.write_str("identifier"),
921            Token::ConstantIdent(_) => f.write_str("constant identifier"),
922            Token::QuotedIdent(_) => f.write_str("quoted identifier"),
923            Token::QuotedString(_) => f.write_str("quoted string"),
924            Token::Comment => f.write_str("comment"),
925            Token::Eof => write!(f, "end of file"),
926        }
927    }
928}
929
930impl<'input> Token<'input> {
931    /// Returns true if this token represents the name of an instruction.
932    ///
933    /// This is used to simplify diagnostic output related to expected tokens so as not to
934    /// overwhelm the user with a ton of possible expected instruction variants.
935    pub fn is_instruction(&self) -> bool {
936        matches!(
937            self,
938            Token::Add
939                | Token::Adv
940                | Token::InsertHdword
941                | Token::InsertHdwordWithDomain
942                | Token::InsertHqword
943                | Token::InsertHperm
944                | Token::InsertMem
945                | Token::AdvLoadw
946                | Token::AdvPipe
947                | Token::AdvPush
948                | Token::AdvStack
949                | Token::PushMapval
950                | Token::PushMapvalCount
951                | Token::PushMapvaln
952                | Token::PushMtnode
953                | Token::And
954                | Token::Assert
955                | Token::Assertz
956                | Token::AssertEq
957                | Token::AssertEqw
958                | Token::EvalCircuit
959                | Token::Caller
960                | Token::Call
961                | Token::Cdrop
962                | Token::Cdropw
963                | Token::Clk
964                | Token::CryptoStream
965                | Token::Cswap
966                | Token::Cswapw
967                | Token::Debug
968                | Token::Div
969                | Token::Drop
970                | Token::Dropw
971                | Token::Dup
972                | Token::Dupw
973                | Token::Dynexec
974                | Token::Dyncall
975                | Token::Emit
976                | Token::Eq
977                | Token::Eqw
978                | Token::Ext2Add
979                | Token::Ext2Div
980                | Token::Ext2Inv
981                | Token::Ext2Mul
982                | Token::Ext2Neg
983                | Token::Ext2Sub
984                | Token::Exec
985                | Token::Exp
986                | Token::ExpU
987                | Token::FriExt2Fold4
988                | Token::Gt
989                | Token::Gte
990                | Token::Hash
991                | Token::Hperm
992                | Token::Hmerge
993                | Token::HornerBase
994                | Token::HornerExt
995                | Token::LogPrecompile
996                | Token::ILog2
997                | Token::Inv
998                | Token::IsOdd
999                | Token::Local
1000                | Token::Locaddr
1001                | Token::LocLoad
1002                | Token::LocLoadw
1003                | Token::LocLoadwBe
1004                | Token::LocLoadwLe
1005                | Token::LocStore
1006                | Token::LocStorew
1007                | Token::LocStorewBe
1008                | Token::LocStorewLe
1009                | Token::Lt
1010                | Token::Lte
1011                | Token::Mem
1012                | Token::MemLoad
1013                | Token::MemLoadw
1014                | Token::MemLoadwBe
1015                | Token::MemLoadwLe
1016                | Token::MemStore
1017                | Token::MemStorew
1018                | Token::MemStorewBe
1019                | Token::MemStorewLe
1020                | Token::MemStream
1021                | Token::Movdn
1022                | Token::Movdnw
1023                | Token::Movup
1024                | Token::Movupw
1025                | Token::MtreeGet
1026                | Token::MtreeMerge
1027                | Token::MtreeSet
1028                | Token::MtreeVerify
1029                | Token::Mul
1030                | Token::Neg
1031                | Token::Neq
1032                | Token::Not
1033                | Token::Nop
1034                | Token::Or
1035                | Token::Padw
1036                | Token::Pow2
1037                | Token::Procref
1038                | Token::Push
1039                | Token::Repeat
1040                | Token::Reversew
1041                | Token::Reversedw
1042                | Token::Sdepth
1043                | Token::Stack
1044                | Token::Sub
1045                | Token::Swap
1046                | Token::Swapw
1047                | Token::Swapdw
1048                | Token::Syscall
1049                | Token::Trace
1050                | Token::U32And
1051                | Token::U32Assert
1052                | Token::U32Assert2
1053                | Token::U32Assertw
1054                | Token::U32Cast
1055                | Token::U32Div
1056                | Token::U32Divmod
1057                | Token::U32Gt
1058                | Token::U32Gte
1059                | Token::U32Lt
1060                | Token::U32Lte
1061                | Token::U32Max
1062                | Token::U32Min
1063                | Token::U32Mod
1064                | Token::U32Not
1065                | Token::U32Or
1066                | Token::U32OverflowingAdd
1067                | Token::U32OverflowingAdd3
1068                | Token::U32WideningAdd
1069                | Token::U32WideningAdd3
1070                | Token::U32WideningMadd
1071                | Token::U32WideningMul
1072                | Token::U32OverflowingSub
1073                | Token::U32Popcnt
1074                | Token::U32Clz
1075                | Token::U32Ctz
1076                | Token::U32Clo
1077                | Token::U32Cto
1078                | Token::U32Rotl
1079                | Token::U32Rotr
1080                | Token::U32Shl
1081                | Token::U32Shr
1082                | Token::U32Split
1083                | Token::U32Test
1084                | Token::U32Testw
1085                | Token::U32WrappingAdd
1086                | Token::U32WrappingAdd3
1087                | Token::U32WrappingMadd
1088                | Token::U32WrappingMul
1089                | Token::U32WrappingSub
1090                | Token::U32Xor
1091                | Token::Xor
1092        )
1093    }
1094
1095    /// Returns true if this token represents the name of an type or a type-related keyword.
1096    ///
1097    /// This is used to simplify diagnostic output related to expected tokens so as not to
1098    /// overwhelm the user with a ton of possible expected tokens.
1099    pub fn is_type_keyword(&self) -> bool {
1100        matches!(
1101            self,
1102            Token::Addrspace
1103                | Token::Ptr
1104                | Token::I1
1105                | Token::I8
1106                | Token::I16
1107                | Token::I32
1108                | Token::I64
1109                | Token::I128
1110                | Token::U8
1111                | Token::U16
1112                | Token::U32
1113                | Token::U64
1114                | Token::U128
1115                | Token::Felt
1116                | Token::Word
1117                | Token::Struct
1118        )
1119    }
1120
1121    const KEYWORDS: &'static [(&'static str, Token<'static>)] = &[
1122        ("add", Token::Add),
1123        ("addrspace", Token::Addrspace),
1124        ("adv", Token::Adv),
1125        ("adv_map", Token::AdvMap),
1126        ("eval_circuit", Token::EvalCircuit),
1127        ("insert_hdword", Token::InsertHdword),
1128        ("insert_hdword_d", Token::InsertHdwordWithDomain),
1129        ("insert_hqword", Token::InsertHqword),
1130        ("insert_hperm", Token::InsertHperm),
1131        ("insert_mem", Token::InsertMem),
1132        ("adv_loadw", Token::AdvLoadw),
1133        ("adv_pipe", Token::AdvPipe),
1134        ("adv_push", Token::AdvPush),
1135        ("adv_stack", Token::AdvStack),
1136        ("push_mapval", Token::PushMapval),
1137        ("push_mapval_count", Token::PushMapvalCount),
1138        ("push_mapvaln", Token::PushMapvaln),
1139        ("push_mtnode", Token::PushMtnode),
1140        ("and", Token::And),
1141        ("assert", Token::Assert),
1142        ("assertz", Token::Assertz),
1143        ("assert_eq", Token::AssertEq),
1144        ("assert_eqw", Token::AssertEqw),
1145        ("begin", Token::Begin),
1146        ("byte", Token::Byte),
1147        ("caller", Token::Caller),
1148        ("call", Token::Call),
1149        ("cdrop", Token::Cdrop),
1150        ("cdropw", Token::Cdropw),
1151        ("clk", Token::Clk),
1152        ("const", Token::Const),
1153        ("crypto_stream", Token::CryptoStream),
1154        ("cswap", Token::Cswap),
1155        ("cswapw", Token::Cswapw),
1156        ("debug", Token::Debug),
1157        ("div", Token::Div),
1158        ("drop", Token::Drop),
1159        ("dropw", Token::Dropw),
1160        ("dup", Token::Dup),
1161        ("dupw", Token::Dupw),
1162        ("dynexec", Token::Dynexec),
1163        ("dyncall", Token::Dyncall),
1164        ("else", Token::Else),
1165        ("emit", Token::Emit),
1166        ("end", Token::End),
1167        ("enum", Token::Enum),
1168        ("eq", Token::Eq),
1169        ("eqw", Token::Eqw),
1170        ("ext2add", Token::Ext2Add),
1171        ("ext2div", Token::Ext2Div),
1172        ("ext2inv", Token::Ext2Inv),
1173        ("ext2mul", Token::Ext2Mul),
1174        ("ext2neg", Token::Ext2Neg),
1175        ("ext2sub", Token::Ext2Sub),
1176        ("err", Token::Err),
1177        ("exec", Token::Exec),
1178        ("exp", Token::Exp),
1179        ("exp.u", Token::ExpU),
1180        ("export", Token::Export),
1181        ("false", Token::False),
1182        ("felt", Token::Felt),
1183        ("fri_ext2fold4", Token::FriExt2Fold4),
1184        ("gt", Token::Gt),
1185        ("gte", Token::Gte),
1186        ("hash", Token::Hash),
1187        ("has_mapkey", Token::HasMapkey),
1188        ("hperm", Token::Hperm),
1189        ("hmerge", Token::Hmerge),
1190        ("i1", Token::I1),
1191        ("i8", Token::I8),
1192        ("i16", Token::I16),
1193        ("i32", Token::I32),
1194        ("i64", Token::I64),
1195        ("i128", Token::I128),
1196        ("if", Token::If),
1197        ("ilog2", Token::ILog2),
1198        ("inv", Token::Inv),
1199        ("is_odd", Token::IsOdd),
1200        ("local", Token::Local),
1201        ("locaddr", Token::Locaddr),
1202        ("loc_load", Token::LocLoad),
1203        ("loc_loadw", Token::LocLoadw),
1204        ("loc_loadw_be", Token::LocLoadwBe),
1205        ("loc_loadw_le", Token::LocLoadwLe),
1206        ("loc_store", Token::LocStore),
1207        ("loc_storew", Token::LocStorew),
1208        ("loc_storew_be", Token::LocStorewBe),
1209        ("loc_storew_le", Token::LocStorewLe),
1210        ("lt", Token::Lt),
1211        ("lte", Token::Lte),
1212        ("mem", Token::Mem),
1213        ("mem_load", Token::MemLoad),
1214        ("mem_loadw", Token::MemLoadw),
1215        ("mem_loadw_be", Token::MemLoadwBe),
1216        ("mem_loadw_le", Token::MemLoadwLe),
1217        ("mem_store", Token::MemStore),
1218        ("mem_storew", Token::MemStorew),
1219        ("mem_storew_be", Token::MemStorewBe),
1220        ("mem_storew_le", Token::MemStorewLe),
1221        ("mem_stream", Token::MemStream),
1222        ("movdn", Token::Movdn),
1223        ("movdnw", Token::Movdnw),
1224        ("movup", Token::Movup),
1225        ("movupw", Token::Movupw),
1226        ("mtree_get", Token::MtreeGet),
1227        ("mtree_merge", Token::MtreeMerge),
1228        ("mtree_set", Token::MtreeSet),
1229        ("mtree_verify", Token::MtreeVerify),
1230        ("mul", Token::Mul),
1231        ("neg", Token::Neg),
1232        ("neq", Token::Neq),
1233        ("not", Token::Not),
1234        ("nop", Token::Nop),
1235        ("or", Token::Or),
1236        ("padw", Token::Padw),
1237        ("pow2", Token::Pow2),
1238        ("proc", Token::Proc),
1239        ("procref", Token::Procref),
1240        ("ptr", Token::Ptr),
1241        ("push", Token::Push),
1242        ("pub", Token::Pub),
1243        ("horner_eval_base", Token::HornerBase),
1244        ("horner_eval_ext", Token::HornerExt),
1245        ("log_precompile", Token::LogPrecompile),
1246        ("repeat", Token::Repeat),
1247        ("reversew", Token::Reversew),
1248        ("reversedw", Token::Reversedw),
1249        ("sdepth", Token::Sdepth),
1250        ("stack", Token::Stack),
1251        ("struct", Token::Struct),
1252        ("sub", Token::Sub),
1253        ("swap", Token::Swap),
1254        ("swapw", Token::Swapw),
1255        ("swapdw", Token::Swapdw),
1256        ("syscall", Token::Syscall),
1257        ("trace", Token::Trace),
1258        ("true", Token::True),
1259        ("type", Token::Type),
1260        ("use", Token::Use),
1261        ("u8", Token::U8),
1262        ("u16", Token::U16),
1263        ("u32", Token::U32),
1264        ("u32and", Token::U32And),
1265        ("u32assert", Token::U32Assert),
1266        ("u32assert2", Token::U32Assert2),
1267        ("u32assertw", Token::U32Assertw),
1268        ("u32cast", Token::U32Cast),
1269        ("u32div", Token::U32Div),
1270        ("u32divmod", Token::U32Divmod),
1271        ("u32gt", Token::U32Gt),
1272        ("u32gte", Token::U32Gte),
1273        ("u32lt", Token::U32Lt),
1274        ("u32lte", Token::U32Lte),
1275        ("u32max", Token::U32Max),
1276        ("u32min", Token::U32Min),
1277        ("u32mod", Token::U32Mod),
1278        ("u32not", Token::U32Not),
1279        ("u32or", Token::U32Or),
1280        ("u32overflowing_add", Token::U32OverflowingAdd),
1281        ("u32overflowing_add3", Token::U32OverflowingAdd3),
1282        ("u32widening_add", Token::U32WideningAdd),
1283        ("u32widening_add3", Token::U32WideningAdd3),
1284        ("u32widening_madd", Token::U32WideningMadd),
1285        ("u32widening_mul", Token::U32WideningMul),
1286        ("u32overflowing_sub", Token::U32OverflowingSub),
1287        ("u32popcnt", Token::U32Popcnt),
1288        ("u32clz", Token::U32Clz),
1289        ("u32ctz", Token::U32Ctz),
1290        ("u32clo", Token::U32Clo),
1291        ("u32cto", Token::U32Cto),
1292        ("u32rotl", Token::U32Rotl),
1293        ("u32rotr", Token::U32Rotr),
1294        ("u32shl", Token::U32Shl),
1295        ("u32shr", Token::U32Shr),
1296        ("u32split", Token::U32Split),
1297        ("u32test", Token::U32Test),
1298        ("u32testw", Token::U32Testw),
1299        ("u32wrapping_add", Token::U32WrappingAdd),
1300        ("u32wrapping_add3", Token::U32WrappingAdd3),
1301        ("u32wrapping_madd", Token::U32WrappingMadd),
1302        ("u32wrapping_mul", Token::U32WrappingMul),
1303        ("u32wrapping_sub", Token::U32WrappingSub),
1304        ("u32xor", Token::U32Xor),
1305        ("u64", Token::U64),
1306        ("u128", Token::U128),
1307        ("while", Token::While),
1308        ("word", Token::Word),
1309        ("event", Token::Event),
1310        ("xor", Token::Xor),
1311    ];
1312
1313    /// Constructs a DFA capable of recognizing Miden Assembly keywords.
1314    ///
1315    /// Constructing the state machine is expensive, so it should not be done in hot code. Instead,
1316    /// prefer to construct it once and reuse it many times.
1317    ///
1318    /// Currently we construct an instance of this searcher in the lexer, which is then used to
1319    /// select a keyword token or construct an identifier token depending on whether a given string
1320    /// is a known keyword.
1321    pub fn keyword_searcher() -> aho_corasick::AhoCorasick {
1322        use aho_corasick::AhoCorasick;
1323
1324        // Execute a search for any of the keywords above, matching longest first, and requiring
1325        // the match to cover the entire input.
1326        AhoCorasick::builder()
1327            .match_kind(aho_corasick::MatchKind::LeftmostLongest)
1328            .start_kind(aho_corasick::StartKind::Anchored)
1329            .build(Self::KEYWORDS.iter().map(|(kw, _)| kw).copied())
1330            .expect("unable to build aho-corasick searcher for token")
1331    }
1332
1333    /// Returns an appropriate [Token] depending on whether the given string is a keyword or an
1334    /// identifier.
1335    ///
1336    /// NOTE: This constructs and throws away an expensive-to-construct Aho-Corasick state machine.
1337    /// You should not call this from any code on a hot path. Instead, construct the state machine
1338    /// once using [Token::keyword_searcher], and reuse it for all searches using
1339    /// [Token::from_keyword_or_ident_with_searcher].
1340    ///
1341    /// Currently, this function is only called along one code path, which is when we are
1342    /// constructing a parser error in which we wish to determine which, if any, of the expected
1343    /// tokens are instruction opcode keywords, so we can collapse them into a more user-friendly
1344    /// error message. This is not on a hot path, so we don't care if it is a bit slow.
1345    pub fn from_keyword_or_ident(s: &'input str) -> Self {
1346        let searcher = Self::keyword_searcher();
1347        Self::from_keyword_or_ident_with_searcher(s, &searcher)
1348    }
1349
1350    /// This is the primary function you should use when you wish to get an appropriate token for
1351    /// a given input string, depending on whether it is a keyword or an identifier.
1352    ///
1353    /// See [Token::keyword_searcher] for additional information on how this is meant to be used.
1354    pub fn from_keyword_or_ident_with_searcher(
1355        s: &'input str,
1356        searcher: &aho_corasick::AhoCorasick,
1357    ) -> Self {
1358        let input = aho_corasick::Input::new(s).anchored(aho_corasick::Anchored::Yes);
1359        match searcher.find(input) {
1360            // No match, it's an ident
1361            None => Token::Ident(s),
1362            // If the match is not exact, it's an ident
1363            Some(matched) if matched.len() != s.len() => Token::Ident(s),
1364            // Otherwise clone the Token corresponding to the keyword that was matched
1365            Some(matched) => Self::KEYWORDS[matched.pattern().as_usize()].1.clone(),
1366        }
1367    }
1368
1369    /// Parses a [Token] from a string corresponding to that token.
1370    ///
1371    /// This solely exists to aid in constructing more user-friendly error messages in certain
1372    /// scenarios, and is otherwise not used (nor should it be). It is quite expensive to call due
1373    /// to invoking [Token::keyword_searcher] under the covers. See the documentation for that
1374    /// function for more details.
1375    pub fn parse(s: &'input str) -> Option<Token<'input>> {
1376        match Token::from_keyword_or_ident(s) {
1377            Token::Ident(_) => {
1378                // Nope, try again
1379                match s {
1380                    "@" => Some(Token::At),
1381                    "!" => Some(Token::Bang),
1382                    ":" => Some(Token::Colon),
1383                    "::" => Some(Token::ColonColon),
1384                    "." => Some(Token::Dot),
1385                    "," => Some(Token::Comma),
1386                    "=" => Some(Token::Equal),
1387                    "<" => Some(Token::Langle),
1388                    "(" => Some(Token::Lparen),
1389                    "{" => Some(Token::Lbrace),
1390                    "[" => Some(Token::Lbracket),
1391                    "-" => Some(Token::Minus),
1392                    "+" => Some(Token::Plus),
1393                    ";" => Some(Token::Semicolon),
1394                    "//" => Some(Token::SlashSlash),
1395                    "/" => Some(Token::Slash),
1396                    "*" => Some(Token::Star),
1397                    ">" => Some(Token::Rangle),
1398                    ")" => Some(Token::Rparen),
1399                    "}" => Some(Token::Rbrace),
1400                    "]" => Some(Token::Rbracket),
1401                    "->" => Some(Token::Rstab),
1402                    ".." => Some(Token::Range),
1403                    "end of file" => Some(Token::Eof),
1404                    "module doc" => {
1405                        Some(Token::DocComment(DocumentationType::Module(String::new())))
1406                    },
1407                    "doc comment" => {
1408                        Some(Token::DocComment(DocumentationType::Form(String::new())))
1409                    },
1410                    "comment" => Some(Token::Comment),
1411                    "hex-encoded value" => Some(Token::HexValue(IntValue::U8(0))),
1412                    "hex-encoded word" => Some(Token::HexWord(WordValue([Felt::ZERO; 4]))),
1413                    "bin-encoded value" => Some(Token::BinValue(BinEncodedValue::U8(0))),
1414                    "integer" => Some(Token::Int(0)),
1415                    "identifier" => Some(Token::Ident("")),
1416                    "constant identifier" => Some(Token::ConstantIdent("")),
1417                    "quoted identifier" => Some(Token::QuotedIdent("")),
1418                    "quoted string" => Some(Token::QuotedString("")),
1419                    _ => None,
1420                }
1421            },
1422            // We matched a keyword
1423            token => Some(token),
1424        }
1425    }
1426}