Skip to main content

miden_assembly_syntax/parser/
token.rs

1use alloc::string::String;
2use core::fmt;
3
4use miden_core::{
5    Felt,
6    field::PrimeField64,
7    serde::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable},
8};
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12// DOCUMENTATION TYPE
13// ================================================================================================
14
15/// Represents the scope of a given documentation comment
16#[derive(Debug, Clone)]
17pub enum DocumentationType {
18    Module(String),
19    Form(String),
20}
21
22impl From<DocumentationType> for String {
23    fn from(doc: DocumentationType) -> Self {
24        match doc {
25            DocumentationType::Module(s) => s,
26            DocumentationType::Form(s) => s,
27        }
28    }
29}
30
31impl core::ops::Deref for DocumentationType {
32    type Target = String;
33    fn deref(&self) -> &Self::Target {
34        match self {
35            Self::Module(s) => s,
36            Self::Form(s) => s,
37        }
38    }
39}
40
41// PUSH VALUE
42// ================================================================================================
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum PushValue {
46    Int(IntValue),
47    Word(WordValue),
48}
49
50impl From<u8> for PushValue {
51    fn from(value: u8) -> Self {
52        Self::Int(value.into())
53    }
54}
55
56impl From<u16> for PushValue {
57    fn from(value: u16) -> Self {
58        Self::Int(value.into())
59    }
60}
61
62impl From<u32> for PushValue {
63    fn from(value: u32) -> Self {
64        Self::Int(value.into())
65    }
66}
67
68impl From<Felt> for PushValue {
69    fn from(value: Felt) -> Self {
70        Self::Int(value.into())
71    }
72}
73
74impl From<IntValue> for PushValue {
75    fn from(value: IntValue) -> Self {
76        Self::Int(value)
77    }
78}
79
80impl From<WordValue> for PushValue {
81    fn from(value: WordValue) -> Self {
82        Self::Word(value)
83    }
84}
85
86impl fmt::Display for PushValue {
87    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88        match self {
89            Self::Int(value) => fmt::Display::fmt(value, f),
90            Self::Word(value) => fmt::Display::fmt(value, f),
91        }
92    }
93}
94
95impl crate::prettier::PrettyPrint for PushValue {
96    fn render(&self) -> crate::prettier::Document {
97        match self {
98            Self::Int(value) => value.render(),
99            Self::Word(value) => value.render(),
100        }
101    }
102}
103
104// WORD VALUE
105// ================================================================================================
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq)]
108#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
109#[cfg_attr(feature = "serde", serde(transparent))]
110#[cfg_attr(
111    all(feature = "arbitrary", test),
112    miden_test_serde_macros::serde_test(binary_serde(true))
113)]
114pub struct WordValue(pub [Felt; 4]);
115
116impl fmt::Display for WordValue {
117    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118        let mut builder = f.debug_list();
119        for value in self.0 {
120            builder.entry(&value.as_canonical_u64());
121        }
122        builder.finish()
123    }
124}
125
126impl crate::prettier::PrettyPrint for WordValue {
127    fn render(&self) -> crate::prettier::Document {
128        use crate::prettier::*;
129
130        const_text("[")
131            + self
132                .0
133                .iter()
134                .copied()
135                .map(display)
136                .reduce(|acc, doc| acc + const_text(",") + doc)
137                .unwrap_or_default()
138            + const_text("]")
139    }
140}
141
142impl PartialOrd for WordValue {
143    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
144        Some(self.cmp(other))
145    }
146}
147impl Ord for WordValue {
148    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
149        let (WordValue([l0, l1, l2, l3]), WordValue([r0, r1, r2, r3])) = (self, other);
150        l0.as_canonical_u64()
151            .cmp(&r0.as_canonical_u64())
152            .then_with(|| l1.as_canonical_u64().cmp(&r1.as_canonical_u64()))
153            .then_with(|| l2.as_canonical_u64().cmp(&r2.as_canonical_u64()))
154            .then_with(|| l3.as_canonical_u64().cmp(&r3.as_canonical_u64()))
155    }
156}
157
158impl core::hash::Hash for WordValue {
159    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
160        let WordValue([a, b, c, d]) = self;
161        [
162            a.as_canonical_u64(),
163            b.as_canonical_u64(),
164            c.as_canonical_u64(),
165            d.as_canonical_u64(),
166        ]
167        .hash(state)
168    }
169}
170
171#[cfg(feature = "arbitrary")]
172impl proptest::arbitrary::Arbitrary for WordValue {
173    type Parameters = ();
174
175    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
176        use proptest::{array::uniform4, strategy::Strategy};
177        uniform4((0..crate::FIELD_MODULUS).prop_map(Felt::new_unchecked))
178            .prop_map(WordValue)
179            .no_shrink()  // Pure random values, no meaningful shrinking pattern
180            .boxed()
181    }
182
183    type Strategy = proptest::prelude::BoxedStrategy<Self>;
184}
185
186impl Serializable for WordValue {
187    fn write_into<W: ByteWriter>(&self, target: &mut W) {
188        self.0[0].write_into(target);
189        self.0[1].write_into(target);
190        self.0[2].write_into(target);
191        self.0[3].write_into(target);
192    }
193}
194
195impl Deserializable for WordValue {
196    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
197        let a = Felt::read_from(source)?;
198        let b = Felt::read_from(source)?;
199        let c = Felt::read_from(source)?;
200        let d = Felt::read_from(source)?;
201        Ok(Self([a, b, c, d]))
202    }
203}
204
205// INT VALUE
206// ================================================================================================
207
208/// Represents one of the various types of values that have a hex-encoded representation in Miden
209/// Assembly source files.
210#[derive(Debug, Copy, Clone, PartialEq, Eq)]
211#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
212#[cfg_attr(feature = "serde", serde(untagged))]
213#[cfg_attr(
214    all(feature = "arbitrary", test),
215    miden_test_serde_macros::serde_test(binary_serde(true))
216)]
217pub enum IntValue {
218    /// A tiny value
219    U8(u8),
220    /// A small value
221    U16(u16),
222    /// A u32 constant, typically represents a memory address
223    U32(u32),
224    /// A single field element, 8 bytes, encoded as 16 hex digits
225    Felt(Felt),
226}
227
228impl From<u8> for IntValue {
229    fn from(value: u8) -> Self {
230        Self::U8(value)
231    }
232}
233
234impl From<u16> for IntValue {
235    fn from(value: u16) -> Self {
236        Self::U16(value)
237    }
238}
239
240impl From<u32> for IntValue {
241    fn from(value: u32) -> Self {
242        Self::U32(value)
243    }
244}
245
246impl From<Felt> for IntValue {
247    fn from(value: Felt) -> Self {
248        Self::Felt(value)
249    }
250}
251
252impl IntValue {
253    pub fn as_int(&self) -> u64 {
254        match self {
255            Self::U8(value) => *value as u64,
256            Self::U16(value) => *value as u64,
257            Self::U32(value) => *value as u64,
258            Self::Felt(value) => value.as_canonical_u64(),
259        }
260    }
261
262    /// Returns the value as a `u64`.
263    ///
264    /// This is an alias for [`as_int`](Self::as_int) that matches the `Felt` API,
265    /// allowing the generated grammar code to use a consistent method name.
266    pub fn as_canonical_u64(&self) -> u64 {
267        self.as_int()
268    }
269
270    pub fn checked_add(&self, rhs: Self) -> Option<Self> {
271        let value = self.as_int().checked_add(rhs.as_int())?;
272        if value >= crate::FIELD_MODULUS {
273            return None;
274        }
275        Some(super::lexer::shrink_u64_hex(value))
276    }
277
278    pub fn checked_sub(&self, rhs: Self) -> Option<Self> {
279        let value = self.as_int().checked_sub(rhs.as_int())?;
280        if value >= crate::FIELD_MODULUS {
281            return None;
282        }
283        Some(super::lexer::shrink_u64_hex(value))
284    }
285
286    pub fn checked_mul(&self, rhs: Self) -> Option<Self> {
287        let value = self.as_int().checked_mul(rhs.as_int())?;
288        if value >= crate::FIELD_MODULUS {
289            return None;
290        }
291        Some(super::lexer::shrink_u64_hex(value))
292    }
293
294    pub fn checked_div(&self, rhs: Self) -> Option<Self> {
295        let value = self.as_int().checked_div(rhs.as_int())?;
296        if value >= crate::FIELD_MODULUS {
297            return None;
298        }
299        Some(super::lexer::shrink_u64_hex(value))
300    }
301}
302
303impl core::ops::Add<IntValue> for IntValue {
304    type Output = IntValue;
305
306    fn add(self, rhs: IntValue) -> Self::Output {
307        super::lexer::shrink_u64_hex(self.as_int() + rhs.as_int())
308    }
309}
310
311impl core::ops::Sub<IntValue> for IntValue {
312    type Output = IntValue;
313
314    fn sub(self, rhs: IntValue) -> Self::Output {
315        super::lexer::shrink_u64_hex(self.as_int() - rhs.as_int())
316    }
317}
318
319impl core::ops::Mul<IntValue> for IntValue {
320    type Output = IntValue;
321
322    fn mul(self, rhs: IntValue) -> Self::Output {
323        super::lexer::shrink_u64_hex(self.as_int() * rhs.as_int())
324    }
325}
326
327impl core::ops::Div<IntValue> for IntValue {
328    type Output = IntValue;
329
330    fn div(self, rhs: IntValue) -> Self::Output {
331        super::lexer::shrink_u64_hex(self.as_int() / rhs.as_int())
332    }
333}
334
335impl PartialEq<Felt> for IntValue {
336    fn eq(&self, other: &Felt) -> bool {
337        match self {
338            Self::U8(lhs) => (*lhs as u64) == other.as_canonical_u64(),
339            Self::U16(lhs) => (*lhs as u64) == other.as_canonical_u64(),
340            Self::U32(lhs) => (*lhs as u64) == other.as_canonical_u64(),
341            Self::Felt(lhs) => lhs == other,
342        }
343    }
344}
345
346impl fmt::Display for IntValue {
347    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
348        match self {
349            Self::U8(value) => write!(f, "{value}"),
350            Self::U16(value) => write!(f, "{value}"),
351            Self::U32(value) => write!(f, "{value:#04x}"),
352            Self::Felt(value) => write!(f, "{:#08x}", &value.as_canonical_u64().to_be()),
353        }
354    }
355}
356
357impl crate::prettier::PrettyPrint for IntValue {
358    fn render(&self) -> crate::prettier::Document {
359        match self {
360            Self::U8(v) => v.render(),
361            Self::U16(v) => v.render(),
362            Self::U32(v) => v.render(),
363            Self::Felt(v) => v.as_canonical_u64().render(),
364        }
365    }
366}
367
368impl PartialOrd for IntValue {
369    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
370        Some(self.cmp(other))
371    }
372}
373
374impl Ord for IntValue {
375    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
376        use core::cmp::Ordering;
377        match (self, other) {
378            (Self::U8(l), Self::U8(r)) => l.cmp(r),
379            (Self::U8(_), _) => Ordering::Less,
380            (Self::U16(_), Self::U8(_)) => Ordering::Greater,
381            (Self::U16(l), Self::U16(r)) => l.cmp(r),
382            (Self::U16(_), _) => Ordering::Less,
383            (Self::U32(_), Self::U8(_) | Self::U16(_)) => Ordering::Greater,
384            (Self::U32(l), Self::U32(r)) => l.cmp(r),
385            (Self::U32(_), _) => Ordering::Less,
386            (Self::Felt(_), Self::U8(_) | Self::U16(_) | Self::U32(_)) => Ordering::Greater,
387            (Self::Felt(l), Self::Felt(r)) => l.as_canonical_u64().cmp(&r.as_canonical_u64()),
388        }
389    }
390}
391
392impl core::hash::Hash for IntValue {
393    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
394        core::mem::discriminant(self).hash(state);
395        match self {
396            Self::U8(value) => value.hash(state),
397            Self::U16(value) => value.hash(state),
398            Self::U32(value) => value.hash(state),
399            Self::Felt(value) => value.as_canonical_u64().hash(state),
400        }
401    }
402}
403
404impl Serializable for IntValue {
405    fn write_into<W: ByteWriter>(&self, target: &mut W) {
406        self.as_int().write_into(target)
407    }
408}
409
410impl Deserializable for IntValue {
411    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
412        let raw = source.read_u64()?;
413        if raw >= Felt::ORDER_U64 {
414            Err(DeserializationError::InvalidValue(
415                "int value is greater than field modulus".into(),
416            ))
417        } else {
418            Ok(super::lexer::shrink_u64_hex(raw))
419        }
420    }
421}
422
423#[cfg(feature = "arbitrary")]
424impl proptest::arbitrary::Arbitrary for IntValue {
425    type Parameters = ();
426
427    fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
428        use proptest::{num, prop_oneof, strategy::Strategy};
429        prop_oneof![
430            // U8 values - full range
431            num::u8::ANY.prop_map(IntValue::U8),
432            // U16 values that don't overlap with U8 to preserve variant during serialization
433            (u8::MAX as u16 + 1..=u16::MAX).prop_map(IntValue::U16),
434            // U32 values that don't overlap with U8/U16 to preserve variant during serialization
435            (u16::MAX as u32 + 1..=u32::MAX).prop_map(IntValue::U32),
436            // Felt values - values that don't fit in u32 but are within field modulus
437            (num::u64::ANY)
438                .prop_filter_map("valid felt value", |n| {
439                    if n > u32::MAX as u64 && n < crate::FIELD_MODULUS {
440                        Some(IntValue::Felt(Felt::new_unchecked(n)))
441                    } else {
442                        None
443                    }
444                }),
445        ]
446        .no_shrink()  // Pure random values, no meaningful shrinking pattern
447        .boxed()
448    }
449
450    type Strategy = proptest::prelude::BoxedStrategy<Self>;
451}
452
453// BINARY ENCODED VALUE
454// ================================================================================================
455
456/// Represents one of the various types of values that have a hex-encoded representation in Miden
457/// Assembly source files.
458#[derive(Debug, Copy, Clone, PartialEq, Eq)]
459pub enum BinEncodedValue {
460    /// A tiny value
461    U8(u8),
462    /// A small value
463    U16(u16),
464    /// A u32 constant, typically represents a memory address
465    U32(u32),
466}
467
468// TOKEN
469// ================================================================================================
470
471/// The token type produced by [crate::parser::Lexer], and consumed by the parser.
472#[derive(Debug, Clone)]
473pub enum Token<'input> {
474    Add,
475    Addrspace,
476    Adv,
477    AdvMap,
478    InsertHdword,
479    InsertHdwordWithDomain,
480    InsertHqword,
481    InsertHperm,
482    InsertMem,
483    AdvLoadw,
484    AdvPipe,
485    AdvPush,
486    AdvPushw,
487    AdvStack,
488    PushMapval,
489    PushMapvalCount,
490    PushMapvaln,
491    PushMtnode,
492    And,
493    Assert,
494    Assertz,
495    AssertEq,
496    AssertEqw,
497    EvalCircuit,
498    Begin,
499    Byte,
500    Caller,
501    Call,
502    Cdrop,
503    Cdropw,
504    Clk,
505    Const,
506    CryptoStream,
507    Cswap,
508    Cswapw,
509    Debug,
510    Div,
511    Drop,
512    Dropw,
513    Dup,
514    Dupw,
515    Dynexec,
516    Dyncall,
517    Else,
518    Emit,
519    End,
520    Enum,
521    Eq,
522    Eqw,
523    Ext2Add,
524    Ext2Div,
525    Ext2Inv,
526    Ext2Mul,
527    Ext2Neg,
528    Ext2Sub,
529    Err,
530    Exec,
531    Export,
532    Exp,
533    ExpU,
534    False,
535    Felt,
536    FriExt2Fold4,
537    Gt,
538    Gte,
539    Hash,
540    HasMapkey,
541    HornerBase,
542    HornerExt,
543    LogPrecompile,
544    Hperm,
545    Hmerge,
546    I1,
547    I8,
548    I16,
549    I32,
550    I64,
551    I128,
552    If,
553    ILog2,
554    Inv,
555    IsOdd,
556    Local,
557    Locaddr,
558    LocLoad,
559    LocLoadw,
560    LocLoadwBe,
561    LocLoadwLe,
562    LocStore,
563    LocStorew,
564    LocStorewBe,
565    LocStorewLe,
566    Lt,
567    Lte,
568    Mem,
569    MemLoad,
570    MemLoadw,
571    MemLoadwBe,
572    MemLoadwLe,
573    MemStore,
574    MemStorew,
575    MemStorewBe,
576    MemStorewLe,
577    MemStream,
578    Movdn,
579    Movdnw,
580    Movup,
581    Movupw,
582    MtreeGet,
583    MtreeMerge,
584    MtreeSet,
585    MtreeVerify,
586    Mul,
587    Neg,
588    Neq,
589    Not,
590    Nop,
591    Or,
592    Padw,
593    Pow2,
594    Proc,
595    Procref,
596    Ptr,
597    Pub,
598    Push,
599    Repeat,
600    Reversew,
601    Reversedw,
602    Range,
603    Sdepth,
604    Stack,
605    Struct,
606    Sub,
607    Swap,
608    Swapw,
609    Swapdw,
610    Syscall,
611    Trace,
612    True,
613    Type,
614    Use,
615    U8,
616    U16,
617    U32,
618    U32And,
619    U32Assert,
620    U32Assert2,
621    U32Assertw,
622    U32Cast,
623    U32Div,
624    U32Divmod,
625    U32Gt,
626    U32Gte,
627    U32Lt,
628    U32Lte,
629    U32Max,
630    U32Min,
631    U32Mod,
632    U32Not,
633    U32Or,
634    U32OverflowingAdd,
635    U32OverflowingAdd3,
636    U32WideningAdd,
637    U32WideningAdd3,
638    U32WideningMadd,
639    U32WideningMul,
640    U32OverflowingSub,
641    U32Popcnt,
642    U32Clz,
643    U32Ctz,
644    U32Clo,
645    U32Cto,
646    U32Rotl,
647    U32Rotr,
648    U32Shl,
649    U32Shr,
650    U32Split,
651    U32Test,
652    U32Testw,
653    U32WrappingAdd,
654    U32WrappingAdd3,
655    U32WrappingMadd,
656    U32WrappingMul,
657    U32WrappingSub,
658    U32Xor,
659    U64,
660    U128,
661    While,
662    Word,
663    Event,
664    Xor,
665    At,
666    Bang,
667    Colon,
668    ColonColon,
669    Dot,
670    Comma,
671    Equal,
672    Langle,
673    Lparen,
674    Lbrace,
675    Lbracket,
676    Minus,
677    Plus,
678    Semicolon,
679    SlashSlash,
680    Slash,
681    Star,
682    Rangle,
683    Rparen,
684    Rbrace,
685    Rbracket,
686    Rstab,
687    DocComment(DocumentationType),
688    HexValue(IntValue),
689    HexWord(WordValue),
690    BinValue(BinEncodedValue),
691    Int(u64),
692    Ident(&'input str),
693    ConstantIdent(&'input str),
694    QuotedIdent(&'input str),
695    QuotedString(&'input str),
696    Comment,
697    Eof,
698}
699
700impl fmt::Display for Token<'_> {
701    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
702        match self {
703            Token::Add => write!(f, "add"),
704            Token::Addrspace => write!(f, "addrspace"),
705            Token::Adv => write!(f, "adv"),
706            Token::AdvMap => write!(f, "adv_map"),
707            Token::AdvStack => write!(f, "adv_stack"),
708            Token::InsertHdword => write!(f, "insert_hdword"),
709            Token::InsertHdwordWithDomain => write!(f, "insert_hdword_d"),
710            Token::InsertHqword => write!(f, "insert_hqword"),
711            Token::InsertHperm => write!(f, "insert_hperm"),
712            Token::InsertMem => write!(f, "insert_mem"),
713            Token::AdvLoadw => write!(f, "adv_loadw"),
714            Token::AdvPipe => write!(f, "adv_pipe"),
715            Token::AdvPush => write!(f, "adv_push"),
716            Token::AdvPushw => write!(f, "adv_pushw"),
717            Token::PushMapval => write!(f, "push_mapval"),
718            Token::PushMapvalCount => write!(f, "push_mapval_count"),
719            Token::PushMapvaln => write!(f, "push_mapvaln"),
720            Token::PushMtnode => write!(f, "push_mtnode"),
721            Token::And => write!(f, "and"),
722            Token::Assert => write!(f, "assert"),
723            Token::Assertz => write!(f, "assertz"),
724            Token::AssertEq => write!(f, "assert_eq"),
725            Token::AssertEqw => write!(f, "assert_eqw"),
726            Token::EvalCircuit => write!(f, "eval_circuit"),
727            Token::Begin => write!(f, "begin"),
728            Token::Byte => write!(f, "byte"),
729            Token::Caller => write!(f, "caller"),
730            Token::Call => write!(f, "call"),
731            Token::Cdrop => write!(f, "cdrop"),
732            Token::Cdropw => write!(f, "cdropw"),
733            Token::Clk => write!(f, "clk"),
734            Token::Const => write!(f, "const"),
735            Token::CryptoStream => write!(f, "crypto_stream"),
736            Token::Cswap => write!(f, "cswap"),
737            Token::Cswapw => write!(f, "cswapw"),
738            Token::Debug => write!(f, "debug"),
739            Token::Div => write!(f, "div"),
740            Token::Drop => write!(f, "drop"),
741            Token::Dropw => write!(f, "dropw"),
742            Token::Dup => write!(f, "dup"),
743            Token::Dupw => write!(f, "dupw"),
744            Token::Dynexec => write!(f, "dynexec"),
745            Token::Dyncall => write!(f, "dyncall"),
746            Token::Else => write!(f, "else"),
747            Token::Emit => write!(f, "emit"),
748            Token::End => write!(f, "end"),
749            Token::Enum => write!(f, "enum"),
750            Token::Eq => write!(f, "eq"),
751            Token::Eqw => write!(f, "eqw"),
752            Token::Ext2Add => write!(f, "ext2add"),
753            Token::Ext2Div => write!(f, "ext2div"),
754            Token::Ext2Inv => write!(f, "ext2inv"),
755            Token::Ext2Mul => write!(f, "ext2mul"),
756            Token::Ext2Neg => write!(f, "ext2neg"),
757            Token::Ext2Sub => write!(f, "ext2sub"),
758            Token::Err => write!(f, "err"),
759            Token::Exec => write!(f, "exec"),
760            Token::Exp => write!(f, "exp"),
761            Token::ExpU => write!(f, "exp.u"),
762            Token::Export => write!(f, "export"),
763            Token::False => write!(f, "false"),
764            Token::Felt => write!(f, "felt"),
765            Token::FriExt2Fold4 => write!(f, "fri_ext2fold4"),
766            Token::Gt => write!(f, "gt"),
767            Token::Gte => write!(f, "gte"),
768            Token::Hash => write!(f, "hash"),
769            Token::HasMapkey => write!(f, "has_mapkey"),
770            Token::Hperm => write!(f, "hperm"),
771            Token::Hmerge => write!(f, "hmerge"),
772            Token::I1 => write!(f, "i1"),
773            Token::I8 => write!(f, "i8"),
774            Token::I16 => write!(f, "i16"),
775            Token::I32 => write!(f, "i32"),
776            Token::I64 => write!(f, "i64"),
777            Token::I128 => write!(f, "i128"),
778            Token::If => write!(f, "if"),
779            Token::ILog2 => write!(f, "ilog2"),
780            Token::Inv => write!(f, "inv"),
781            Token::IsOdd => write!(f, "is_odd"),
782            Token::Local => write!(f, "local"),
783            Token::Locaddr => write!(f, "locaddr"),
784            Token::LocLoad => write!(f, "loc_load"),
785            Token::LocLoadw => write!(f, "loc_loadw"),
786            Token::LocLoadwBe => write!(f, "loc_loadw_be"),
787            Token::LocLoadwLe => write!(f, "loc_loadw_le"),
788            Token::LocStore => write!(f, "loc_store"),
789            Token::LocStorew => write!(f, "loc_storew"),
790            Token::LocStorewBe => write!(f, "loc_storew_be"),
791            Token::LocStorewLe => write!(f, "loc_storew_le"),
792            Token::Lt => write!(f, "lt"),
793            Token::Lte => write!(f, "lte"),
794            Token::Mem => write!(f, "mem"),
795            Token::MemLoad => write!(f, "mem_load"),
796            Token::MemLoadw => write!(f, "mem_loadw"),
797            Token::MemLoadwBe => write!(f, "mem_loadw_be"),
798            Token::MemLoadwLe => write!(f, "mem_loadw_le"),
799            Token::MemStore => write!(f, "mem_store"),
800            Token::MemStorew => write!(f, "mem_storew"),
801            Token::MemStorewBe => write!(f, "mem_storew_be"),
802            Token::MemStorewLe => write!(f, "mem_storew_le"),
803            Token::MemStream => write!(f, "mem_stream"),
804            Token::Movdn => write!(f, "movdn"),
805            Token::Movdnw => write!(f, "movdnw"),
806            Token::Movup => write!(f, "movup"),
807            Token::Movupw => write!(f, "movupw"),
808            Token::MtreeGet => write!(f, "mtree_get"),
809            Token::MtreeMerge => write!(f, "mtree_merge"),
810            Token::MtreeSet => write!(f, "mtree_set"),
811            Token::MtreeVerify => write!(f, "mtree_verify"),
812            Token::Mul => write!(f, "mul"),
813            Token::Neg => write!(f, "neg"),
814            Token::Neq => write!(f, "neq"),
815            Token::Not => write!(f, "not"),
816            Token::Nop => write!(f, "nop"),
817            Token::Or => write!(f, "or"),
818            Token::Padw => write!(f, "padw"),
819            Token::Pow2 => write!(f, "pow2"),
820            Token::Proc => write!(f, "proc"),
821            Token::Procref => write!(f, "procref"),
822            Token::Ptr => write!(f, "ptr"),
823            Token::Pub => write!(f, "pub"),
824            Token::Push => write!(f, "push"),
825            Token::HornerBase => write!(f, "horner_eval_base"),
826            Token::HornerExt => write!(f, "horner_eval_ext"),
827            Token::LogPrecompile => write!(f, "log_precompile"),
828            Token::Repeat => write!(f, "repeat"),
829            Token::Reversew => write!(f, "reversew"),
830            Token::Reversedw => write!(f, "reversedw"),
831            Token::Sdepth => write!(f, "sdepth"),
832            Token::Stack => write!(f, "stack"),
833            Token::Struct => write!(f, "struct"),
834            Token::Sub => write!(f, "sub"),
835            Token::Swap => write!(f, "swap"),
836            Token::Swapw => write!(f, "swapw"),
837            Token::Swapdw => write!(f, "swapdw"),
838            Token::Syscall => write!(f, "syscall"),
839            Token::Trace => write!(f, "trace"),
840            Token::True => write!(f, "true"),
841            Token::Type => write!(f, "type"),
842            Token::Use => write!(f, "use"),
843            Token::U8 => write!(f, "u8"),
844            Token::U16 => write!(f, "u16"),
845            Token::U32 => write!(f, "u32"),
846            Token::U32And => write!(f, "u32and"),
847            Token::U32Assert => write!(f, "u32assert"),
848            Token::U32Assert2 => write!(f, "u32assert2"),
849            Token::U32Assertw => write!(f, "u32assertw"),
850            Token::U32Cast => write!(f, "u32cast"),
851            Token::U32Div => write!(f, "u32div"),
852            Token::U32Divmod => write!(f, "u32divmod"),
853            Token::U32Gt => write!(f, "u32gt"),
854            Token::U32Gte => write!(f, "u32gte"),
855            Token::U32Lt => write!(f, "u32lt"),
856            Token::U32Lte => write!(f, "u32lte"),
857            Token::U32Max => write!(f, "u32max"),
858            Token::U32Min => write!(f, "u32min"),
859            Token::U32Mod => write!(f, "u32mod"),
860            Token::U32Not => write!(f, "u32not"),
861            Token::U32Or => write!(f, "u32or"),
862            Token::U32OverflowingAdd => write!(f, "u32overflowing_add"),
863            Token::U32OverflowingAdd3 => write!(f, "u32overflowing_add3"),
864            Token::U32WideningAdd => write!(f, "u32widening_add"),
865            Token::U32WideningAdd3 => write!(f, "u32widening_add3"),
866            Token::U32WideningMadd => write!(f, "u32widening_madd"),
867            Token::U32WideningMul => write!(f, "u32widening_mul"),
868            Token::U32OverflowingSub => write!(f, "u32overflowing_sub"),
869            Token::U32Popcnt => write!(f, "u32popcnt"),
870            Token::U32Clz => write!(f, "u32clz"),
871            Token::U32Ctz => write!(f, "u32ctz"),
872            Token::U32Clo => write!(f, "u32clo"),
873            Token::U32Cto => write!(f, "u32cto"),
874            Token::U32Rotl => write!(f, "u32rotl"),
875            Token::U32Rotr => write!(f, "u32rotr"),
876            Token::U32Shl => write!(f, "u32shl"),
877            Token::U32Shr => write!(f, "u32shr"),
878            Token::U32Split => write!(f, "u32split"),
879            Token::U32Test => write!(f, "u32test"),
880            Token::U32Testw => write!(f, "u32testw"),
881            Token::U32WrappingAdd => write!(f, "u32wrapping_add"),
882            Token::U32WrappingAdd3 => write!(f, "u32wrapping_add3"),
883            Token::U32WrappingMadd => write!(f, "u32wrapping_madd"),
884            Token::U32WrappingMul => write!(f, "u32wrapping_mul"),
885            Token::U32WrappingSub => write!(f, "u32wrapping_sub"),
886            Token::U32Xor => write!(f, "u32xor"),
887            Token::U64 => write!(f, "u64"),
888            Token::U128 => write!(f, "u128"),
889            Token::While => write!(f, "while"),
890            Token::Word => write!(f, "word"),
891            Token::Event => write!(f, "event"),
892            Token::Xor => write!(f, "xor"),
893            Token::At => write!(f, "@"),
894            Token::Bang => write!(f, "!"),
895            Token::Colon => write!(f, ":"),
896            Token::ColonColon => write!(f, "::"),
897            Token::Dot => write!(f, "."),
898            Token::Comma => write!(f, ","),
899            Token::Equal => write!(f, "="),
900            Token::Langle => write!(f, "<"),
901            Token::Lparen => write!(f, "("),
902            Token::Lbrace => write!(f, "{{"),
903            Token::Lbracket => write!(f, "["),
904            Token::Minus => write!(f, "-"),
905            Token::Plus => write!(f, "+"),
906            Token::Semicolon => write!(f, ";"),
907            Token::SlashSlash => write!(f, "//"),
908            Token::Slash => write!(f, "/"),
909            Token::Star => write!(f, "*"),
910            Token::Rangle => write!(f, ">"),
911            Token::Rparen => write!(f, ")"),
912            Token::Rbrace => write!(f, "}}"),
913            Token::Rbracket => write!(f, "]"),
914            Token::Rstab => write!(f, "->"),
915            Token::Range => write!(f, ".."),
916            Token::DocComment(DocumentationType::Module(_)) => f.write_str("module doc"),
917            Token::DocComment(DocumentationType::Form(_)) => f.write_str("doc comment"),
918            Token::HexValue(_) => f.write_str("hex-encoded value"),
919            Token::HexWord(_) => f.write_str("hex-encoded word"),
920            Token::BinValue(_) => f.write_str("bin-encoded value"),
921            Token::Int(_) => f.write_str("integer"),
922            Token::Ident(_) => f.write_str("identifier"),
923            Token::ConstantIdent(_) => f.write_str("constant identifier"),
924            Token::QuotedIdent(_) => f.write_str("quoted identifier"),
925            Token::QuotedString(_) => f.write_str("quoted string"),
926            Token::Comment => f.write_str("comment"),
927            Token::Eof => write!(f, "end of file"),
928        }
929    }
930}
931
932impl<'input> Token<'input> {
933    /// Returns true if this token represents the name of an instruction.
934    ///
935    /// This is used to simplify diagnostic output related to expected tokens so as not to
936    /// overwhelm the user with a ton of possible expected instruction variants.
937    pub fn is_instruction(&self) -> bool {
938        matches!(
939            self,
940            Token::Add
941                | Token::Adv
942                | Token::InsertHdword
943                | Token::InsertHdwordWithDomain
944                | Token::InsertHqword
945                | Token::InsertHperm
946                | Token::InsertMem
947                | Token::AdvLoadw
948                | Token::AdvPipe
949                | Token::AdvPush
950                | Token::AdvPushw
951                | Token::AdvStack
952                | Token::PushMapval
953                | Token::PushMapvalCount
954                | Token::PushMapvaln
955                | Token::PushMtnode
956                | Token::And
957                | Token::Assert
958                | Token::Assertz
959                | Token::AssertEq
960                | Token::AssertEqw
961                | Token::EvalCircuit
962                | Token::Caller
963                | Token::Call
964                | Token::Cdrop
965                | Token::Cdropw
966                | Token::Clk
967                | Token::CryptoStream
968                | Token::Cswap
969                | Token::Cswapw
970                | Token::Debug
971                | Token::Div
972                | Token::Drop
973                | Token::Dropw
974                | Token::Dup
975                | Token::Dupw
976                | Token::Dynexec
977                | Token::Dyncall
978                | Token::Emit
979                | Token::Eq
980                | Token::Eqw
981                | Token::Ext2Add
982                | Token::Ext2Div
983                | Token::Ext2Inv
984                | Token::Ext2Mul
985                | Token::Ext2Neg
986                | Token::Ext2Sub
987                | Token::Exec
988                | Token::Exp
989                | Token::ExpU
990                | Token::FriExt2Fold4
991                | Token::Gt
992                | Token::Gte
993                | Token::Hash
994                | Token::Hperm
995                | Token::Hmerge
996                | Token::HornerBase
997                | Token::HornerExt
998                | Token::LogPrecompile
999                | Token::ILog2
1000                | Token::Inv
1001                | Token::IsOdd
1002                | Token::Local
1003                | Token::Locaddr
1004                | Token::LocLoad
1005                | Token::LocLoadw
1006                | Token::LocLoadwBe
1007                | Token::LocLoadwLe
1008                | Token::LocStore
1009                | Token::LocStorew
1010                | Token::LocStorewBe
1011                | Token::LocStorewLe
1012                | Token::Lt
1013                | Token::Lte
1014                | Token::Mem
1015                | Token::MemLoad
1016                | Token::MemLoadw
1017                | Token::MemLoadwBe
1018                | Token::MemLoadwLe
1019                | Token::MemStore
1020                | Token::MemStorew
1021                | Token::MemStorewBe
1022                | Token::MemStorewLe
1023                | Token::MemStream
1024                | Token::Movdn
1025                | Token::Movdnw
1026                | Token::Movup
1027                | Token::Movupw
1028                | Token::MtreeGet
1029                | Token::MtreeMerge
1030                | Token::MtreeSet
1031                | Token::MtreeVerify
1032                | Token::Mul
1033                | Token::Neg
1034                | Token::Neq
1035                | Token::Not
1036                | Token::Nop
1037                | Token::Or
1038                | Token::Padw
1039                | Token::Pow2
1040                | Token::Procref
1041                | Token::Push
1042                | Token::Repeat
1043                | Token::Reversew
1044                | Token::Reversedw
1045                | Token::Sdepth
1046                | Token::Stack
1047                | Token::Sub
1048                | Token::Swap
1049                | Token::Swapw
1050                | Token::Swapdw
1051                | Token::Syscall
1052                | Token::Trace
1053                | Token::U32And
1054                | Token::U32Assert
1055                | Token::U32Assert2
1056                | Token::U32Assertw
1057                | Token::U32Cast
1058                | Token::U32Div
1059                | Token::U32Divmod
1060                | Token::U32Gt
1061                | Token::U32Gte
1062                | Token::U32Lt
1063                | Token::U32Lte
1064                | Token::U32Max
1065                | Token::U32Min
1066                | Token::U32Mod
1067                | Token::U32Not
1068                | Token::U32Or
1069                | Token::U32OverflowingAdd
1070                | Token::U32OverflowingAdd3
1071                | Token::U32WideningAdd
1072                | Token::U32WideningAdd3
1073                | Token::U32WideningMadd
1074                | Token::U32WideningMul
1075                | Token::U32OverflowingSub
1076                | Token::U32Popcnt
1077                | Token::U32Clz
1078                | Token::U32Ctz
1079                | Token::U32Clo
1080                | Token::U32Cto
1081                | Token::U32Rotl
1082                | Token::U32Rotr
1083                | Token::U32Shl
1084                | Token::U32Shr
1085                | Token::U32Split
1086                | Token::U32Test
1087                | Token::U32Testw
1088                | Token::U32WrappingAdd
1089                | Token::U32WrappingAdd3
1090                | Token::U32WrappingMadd
1091                | Token::U32WrappingMul
1092                | Token::U32WrappingSub
1093                | Token::U32Xor
1094                | Token::Xor
1095        )
1096    }
1097
1098    /// Returns true if this token represents the name of an type or a type-related keyword.
1099    ///
1100    /// This is used to simplify diagnostic output related to expected tokens so as not to
1101    /// overwhelm the user with a ton of possible expected tokens.
1102    pub fn is_type_keyword(&self) -> bool {
1103        matches!(
1104            self,
1105            Token::Addrspace
1106                | Token::Ptr
1107                | Token::I1
1108                | Token::I8
1109                | Token::I16
1110                | Token::I32
1111                | Token::I64
1112                | Token::I128
1113                | Token::U8
1114                | Token::U16
1115                | Token::U32
1116                | Token::U64
1117                | Token::U128
1118                | Token::Felt
1119                | Token::Word
1120                | Token::Struct
1121        )
1122    }
1123
1124    const KEYWORDS: &'static [(&'static str, Token<'static>)] = &[
1125        ("add", Token::Add),
1126        ("addrspace", Token::Addrspace),
1127        ("adv", Token::Adv),
1128        ("adv_map", Token::AdvMap),
1129        ("eval_circuit", Token::EvalCircuit),
1130        ("insert_hdword", Token::InsertHdword),
1131        ("insert_hdword_d", Token::InsertHdwordWithDomain),
1132        ("insert_hqword", Token::InsertHqword),
1133        ("insert_hperm", Token::InsertHperm),
1134        ("insert_mem", Token::InsertMem),
1135        ("adv_loadw", Token::AdvLoadw),
1136        ("adv_pipe", Token::AdvPipe),
1137        ("adv_push", Token::AdvPush),
1138        ("adv_pushw", Token::AdvPushw),
1139        ("adv_stack", Token::AdvStack),
1140        ("push_mapval", Token::PushMapval),
1141        ("push_mapval_count", Token::PushMapvalCount),
1142        ("push_mapvaln", Token::PushMapvaln),
1143        ("push_mtnode", Token::PushMtnode),
1144        ("and", Token::And),
1145        ("assert", Token::Assert),
1146        ("assertz", Token::Assertz),
1147        ("assert_eq", Token::AssertEq),
1148        ("assert_eqw", Token::AssertEqw),
1149        ("begin", Token::Begin),
1150        ("byte", Token::Byte),
1151        ("caller", Token::Caller),
1152        ("call", Token::Call),
1153        ("cdrop", Token::Cdrop),
1154        ("cdropw", Token::Cdropw),
1155        ("clk", Token::Clk),
1156        ("const", Token::Const),
1157        ("crypto_stream", Token::CryptoStream),
1158        ("cswap", Token::Cswap),
1159        ("cswapw", Token::Cswapw),
1160        ("debug", Token::Debug),
1161        ("div", Token::Div),
1162        ("drop", Token::Drop),
1163        ("dropw", Token::Dropw),
1164        ("dup", Token::Dup),
1165        ("dupw", Token::Dupw),
1166        ("dynexec", Token::Dynexec),
1167        ("dyncall", Token::Dyncall),
1168        ("else", Token::Else),
1169        ("emit", Token::Emit),
1170        ("end", Token::End),
1171        ("enum", Token::Enum),
1172        ("eq", Token::Eq),
1173        ("eqw", Token::Eqw),
1174        ("ext2add", Token::Ext2Add),
1175        ("ext2div", Token::Ext2Div),
1176        ("ext2inv", Token::Ext2Inv),
1177        ("ext2mul", Token::Ext2Mul),
1178        ("ext2neg", Token::Ext2Neg),
1179        ("ext2sub", Token::Ext2Sub),
1180        ("err", Token::Err),
1181        ("exec", Token::Exec),
1182        ("exp", Token::Exp),
1183        ("exp.u", Token::ExpU),
1184        ("export", Token::Export),
1185        ("false", Token::False),
1186        ("felt", Token::Felt),
1187        ("fri_ext2fold4", Token::FriExt2Fold4),
1188        ("gt", Token::Gt),
1189        ("gte", Token::Gte),
1190        ("hash", Token::Hash),
1191        ("has_mapkey", Token::HasMapkey),
1192        ("hperm", Token::Hperm),
1193        ("hmerge", Token::Hmerge),
1194        ("i1", Token::I1),
1195        ("i8", Token::I8),
1196        ("i16", Token::I16),
1197        ("i32", Token::I32),
1198        ("i64", Token::I64),
1199        ("i128", Token::I128),
1200        ("if", Token::If),
1201        ("ilog2", Token::ILog2),
1202        ("inv", Token::Inv),
1203        ("is_odd", Token::IsOdd),
1204        ("local", Token::Local),
1205        ("locaddr", Token::Locaddr),
1206        ("loc_load", Token::LocLoad),
1207        ("loc_loadw", Token::LocLoadw),
1208        ("loc_loadw_be", Token::LocLoadwBe),
1209        ("loc_loadw_le", Token::LocLoadwLe),
1210        ("loc_store", Token::LocStore),
1211        ("loc_storew", Token::LocStorew),
1212        ("loc_storew_be", Token::LocStorewBe),
1213        ("loc_storew_le", Token::LocStorewLe),
1214        ("lt", Token::Lt),
1215        ("lte", Token::Lte),
1216        ("mem", Token::Mem),
1217        ("mem_load", Token::MemLoad),
1218        ("mem_loadw", Token::MemLoadw),
1219        ("mem_loadw_be", Token::MemLoadwBe),
1220        ("mem_loadw_le", Token::MemLoadwLe),
1221        ("mem_store", Token::MemStore),
1222        ("mem_storew", Token::MemStorew),
1223        ("mem_storew_be", Token::MemStorewBe),
1224        ("mem_storew_le", Token::MemStorewLe),
1225        ("mem_stream", Token::MemStream),
1226        ("movdn", Token::Movdn),
1227        ("movdnw", Token::Movdnw),
1228        ("movup", Token::Movup),
1229        ("movupw", Token::Movupw),
1230        ("mtree_get", Token::MtreeGet),
1231        ("mtree_merge", Token::MtreeMerge),
1232        ("mtree_set", Token::MtreeSet),
1233        ("mtree_verify", Token::MtreeVerify),
1234        ("mul", Token::Mul),
1235        ("neg", Token::Neg),
1236        ("neq", Token::Neq),
1237        ("not", Token::Not),
1238        ("nop", Token::Nop),
1239        ("or", Token::Or),
1240        ("padw", Token::Padw),
1241        ("pow2", Token::Pow2),
1242        ("proc", Token::Proc),
1243        ("procref", Token::Procref),
1244        ("ptr", Token::Ptr),
1245        ("push", Token::Push),
1246        ("pub", Token::Pub),
1247        ("horner_eval_base", Token::HornerBase),
1248        ("horner_eval_ext", Token::HornerExt),
1249        ("log_precompile", Token::LogPrecompile),
1250        ("repeat", Token::Repeat),
1251        ("reversew", Token::Reversew),
1252        ("reversedw", Token::Reversedw),
1253        ("sdepth", Token::Sdepth),
1254        ("stack", Token::Stack),
1255        ("struct", Token::Struct),
1256        ("sub", Token::Sub),
1257        ("swap", Token::Swap),
1258        ("swapw", Token::Swapw),
1259        ("swapdw", Token::Swapdw),
1260        ("syscall", Token::Syscall),
1261        ("trace", Token::Trace),
1262        ("true", Token::True),
1263        ("type", Token::Type),
1264        ("use", Token::Use),
1265        ("u8", Token::U8),
1266        ("u16", Token::U16),
1267        ("u32", Token::U32),
1268        ("u32and", Token::U32And),
1269        ("u32assert", Token::U32Assert),
1270        ("u32assert2", Token::U32Assert2),
1271        ("u32assertw", Token::U32Assertw),
1272        ("u32cast", Token::U32Cast),
1273        ("u32div", Token::U32Div),
1274        ("u32divmod", Token::U32Divmod),
1275        ("u32gt", Token::U32Gt),
1276        ("u32gte", Token::U32Gte),
1277        ("u32lt", Token::U32Lt),
1278        ("u32lte", Token::U32Lte),
1279        ("u32max", Token::U32Max),
1280        ("u32min", Token::U32Min),
1281        ("u32mod", Token::U32Mod),
1282        ("u32not", Token::U32Not),
1283        ("u32or", Token::U32Or),
1284        ("u32overflowing_add", Token::U32OverflowingAdd),
1285        ("u32overflowing_add3", Token::U32OverflowingAdd3),
1286        ("u32widening_add", Token::U32WideningAdd),
1287        ("u32widening_add3", Token::U32WideningAdd3),
1288        ("u32widening_madd", Token::U32WideningMadd),
1289        ("u32widening_mul", Token::U32WideningMul),
1290        ("u32overflowing_sub", Token::U32OverflowingSub),
1291        ("u32popcnt", Token::U32Popcnt),
1292        ("u32clz", Token::U32Clz),
1293        ("u32ctz", Token::U32Ctz),
1294        ("u32clo", Token::U32Clo),
1295        ("u32cto", Token::U32Cto),
1296        ("u32rotl", Token::U32Rotl),
1297        ("u32rotr", Token::U32Rotr),
1298        ("u32shl", Token::U32Shl),
1299        ("u32shr", Token::U32Shr),
1300        ("u32split", Token::U32Split),
1301        ("u32test", Token::U32Test),
1302        ("u32testw", Token::U32Testw),
1303        ("u32wrapping_add", Token::U32WrappingAdd),
1304        ("u32wrapping_add3", Token::U32WrappingAdd3),
1305        ("u32wrapping_madd", Token::U32WrappingMadd),
1306        ("u32wrapping_mul", Token::U32WrappingMul),
1307        ("u32wrapping_sub", Token::U32WrappingSub),
1308        ("u32xor", Token::U32Xor),
1309        ("u64", Token::U64),
1310        ("u128", Token::U128),
1311        ("while", Token::While),
1312        ("word", Token::Word),
1313        ("event", Token::Event),
1314        ("xor", Token::Xor),
1315    ];
1316
1317    /// Constructs a DFA capable of recognizing Miden Assembly keywords.
1318    ///
1319    /// Constructing the state machine is expensive, so it should not be done in hot code. Instead,
1320    /// prefer to construct it once and reuse it many times.
1321    ///
1322    /// Currently we construct an instance of this searcher in the lexer, which is then used to
1323    /// select a keyword token or construct an identifier token depending on whether a given string
1324    /// is a known keyword.
1325    pub fn keyword_searcher() -> aho_corasick::AhoCorasick {
1326        use aho_corasick::AhoCorasick;
1327
1328        // Execute a search for any of the keywords above, matching longest first, and requiring
1329        // the match to cover the entire input.
1330        AhoCorasick::builder()
1331            .match_kind(aho_corasick::MatchKind::LeftmostLongest)
1332            .start_kind(aho_corasick::StartKind::Anchored)
1333            .build(Self::KEYWORDS.iter().map(|(kw, _)| kw).copied())
1334            .expect("unable to build aho-corasick searcher for token")
1335    }
1336
1337    /// Returns an appropriate [Token] depending on whether the given string is a keyword or an
1338    /// identifier.
1339    ///
1340    /// NOTE: This constructs and throws away an expensive-to-construct Aho-Corasick state machine.
1341    /// You should not call this from any code on a hot path. Instead, construct the state machine
1342    /// once using [Token::keyword_searcher], and reuse it for all searches using
1343    /// [Token::from_keyword_or_ident_with_searcher].
1344    ///
1345    /// Currently, this function is only called along one code path, which is when we are
1346    /// constructing a parser error in which we wish to determine which, if any, of the expected
1347    /// tokens are instruction opcode keywords, so we can collapse them into a more user-friendly
1348    /// error message. This is not on a hot path, so we don't care if it is a bit slow.
1349    pub fn from_keyword_or_ident(s: &'input str) -> Self {
1350        let searcher = Self::keyword_searcher();
1351        Self::from_keyword_or_ident_with_searcher(s, &searcher)
1352    }
1353
1354    /// This is the primary function you should use when you wish to get an appropriate token for
1355    /// a given input string, depending on whether it is a keyword or an identifier.
1356    ///
1357    /// See [Token::keyword_searcher] for additional information on how this is meant to be used.
1358    pub fn from_keyword_or_ident_with_searcher(
1359        s: &'input str,
1360        searcher: &aho_corasick::AhoCorasick,
1361    ) -> Self {
1362        let input = aho_corasick::Input::new(s).anchored(aho_corasick::Anchored::Yes);
1363        match searcher.find(input) {
1364            // No match, it's an ident
1365            None => Token::Ident(s),
1366            // If the match is not exact, it's an ident
1367            Some(matched) if matched.len() != s.len() => Token::Ident(s),
1368            // Otherwise clone the Token corresponding to the keyword that was matched
1369            Some(matched) => Self::KEYWORDS[matched.pattern().as_usize()].1.clone(),
1370        }
1371    }
1372
1373    /// Parses a [Token] from a string corresponding to that token.
1374    ///
1375    /// This solely exists to aid in constructing more user-friendly error messages in certain
1376    /// scenarios, and is otherwise not used (nor should it be). It is quite expensive to call due
1377    /// to invoking [Token::keyword_searcher] under the covers. See the documentation for that
1378    /// function for more details.
1379    pub fn parse(s: &'input str) -> Option<Token<'input>> {
1380        match Token::from_keyword_or_ident(s) {
1381            Token::Ident(_) => {
1382                // Nope, try again
1383                match s {
1384                    "@" => Some(Token::At),
1385                    "!" => Some(Token::Bang),
1386                    ":" => Some(Token::Colon),
1387                    "::" => Some(Token::ColonColon),
1388                    "." => Some(Token::Dot),
1389                    "," => Some(Token::Comma),
1390                    "=" => Some(Token::Equal),
1391                    "<" => Some(Token::Langle),
1392                    "(" => Some(Token::Lparen),
1393                    "{" => Some(Token::Lbrace),
1394                    "[" => Some(Token::Lbracket),
1395                    "-" => Some(Token::Minus),
1396                    "+" => Some(Token::Plus),
1397                    ";" => Some(Token::Semicolon),
1398                    "//" => Some(Token::SlashSlash),
1399                    "/" => Some(Token::Slash),
1400                    "*" => Some(Token::Star),
1401                    ">" => Some(Token::Rangle),
1402                    ")" => Some(Token::Rparen),
1403                    "}" => Some(Token::Rbrace),
1404                    "]" => Some(Token::Rbracket),
1405                    "->" => Some(Token::Rstab),
1406                    ".." => Some(Token::Range),
1407                    "end of file" => Some(Token::Eof),
1408                    "module doc" => {
1409                        Some(Token::DocComment(DocumentationType::Module(String::new())))
1410                    },
1411                    "doc comment" => {
1412                        Some(Token::DocComment(DocumentationType::Form(String::new())))
1413                    },
1414                    "comment" => Some(Token::Comment),
1415                    "hex-encoded value" => Some(Token::HexValue(IntValue::U8(0))),
1416                    "hex-encoded word" => Some(Token::HexWord(WordValue([Felt::ZERO; 4]))),
1417                    "bin-encoded value" => Some(Token::BinValue(BinEncodedValue::U8(0))),
1418                    "integer" => Some(Token::Int(0)),
1419                    "identifier" => Some(Token::Ident("")),
1420                    "constant identifier" => Some(Token::ConstantIdent("")),
1421                    "quoted identifier" => Some(Token::QuotedIdent("")),
1422                    "quoted string" => Some(Token::QuotedString("")),
1423                    _ => None,
1424                }
1425            },
1426            // We matched a keyword
1427            token => Some(token),
1428        }
1429    }
1430}