triton_isa/
parser.rs

1use std::collections::HashMap;
2use std::collections::HashSet;
3use std::collections::hash_map::Entry;
4use std::error::Error;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::fmt::Result as FmtResult;
8
9use itertools::Itertools;
10use nom::Finish;
11use nom::IResult;
12use nom::Parser;
13use nom::branch::alt;
14use nom::bytes::complete::is_not;
15use nom::bytes::complete::tag;
16use nom::bytes::complete::take_until;
17use nom::bytes::complete::take_while;
18use nom::bytes::complete::take_while1;
19use nom::character::char;
20use nom::character::complete::digit1;
21use nom::combinator::cut;
22use nom::combinator::eof;
23use nom::combinator::opt;
24use nom::combinator::value;
25use nom::error::context;
26use nom::multi::many0;
27use nom::multi::many1;
28use nom::multi::separated_list1;
29use nom::sequence::delimited;
30use nom::sequence::terminated;
31use nom_language::error::VerboseError;
32use nom_language::error::VerboseErrorKind;
33use twenty_first::bfe;
34use twenty_first::prelude::BFieldElement;
35
36use crate::instruction::ALL_INSTRUCTION_NAMES;
37use crate::instruction::AnInstruction;
38use crate::instruction::AssertionContext;
39use crate::instruction::Instruction;
40use crate::instruction::LabelledInstruction;
41use crate::instruction::TypeHint;
42use crate::op_stack::NumberOfWords;
43use crate::op_stack::OpStackElement;
44
45const KEYWORDS: [&str; 3] = [
46    "hint",
47    "error_id",
48    "error_message", // reserved for future use
49];
50
51#[derive(Debug, PartialEq)]
52pub struct ParseError<'a> {
53    pub input: &'a str,
54    pub errors: VerboseError<&'a str>,
55}
56
57/// An intermediate object for the parsing / compilation pipeline. You probably
58/// want [`LabelledInstruction`].
59#[derive(Debug, Clone, Eq, PartialEq, Hash)]
60pub enum InstructionToken<'a> {
61    Instruction(AnInstruction<String>, &'a str),
62    Label(String, &'a str),
63    Breakpoint(&'a str),
64    TypeHint(TypeHint, &'a str),
65    AssertionContext(AssertionContext, &'a str),
66}
67
68impl Display for ParseError<'_> {
69    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
70        let err_display = nom_language::error::convert_error(self.input, self.errors.clone());
71        write!(f, "{err_display}")
72    }
73}
74
75impl Error for ParseError<'_> {}
76
77impl<'a> InstructionToken<'a> {
78    pub fn token_str(&self) -> &'a str {
79        match self {
80            Self::Instruction(_, token_str) => token_str,
81            Self::Label(_, token_str) => token_str,
82            Self::Breakpoint(token_str) => token_str,
83            Self::TypeHint(_, token_str) => token_str,
84            Self::AssertionContext(_, token_str) => token_str,
85        }
86    }
87
88    pub fn to_labelled_instruction(&self) -> LabelledInstruction {
89        match self {
90            Self::Instruction(instr, _) => LabelledInstruction::Instruction(instr.to_owned()),
91            Self::Label(label, _) => LabelledInstruction::Label(label.to_owned()),
92            Self::Breakpoint(_) => LabelledInstruction::Breakpoint,
93            Self::TypeHint(type_hint, _) => LabelledInstruction::TypeHint(type_hint.to_owned()),
94            Self::AssertionContext(ctx, _) => LabelledInstruction::AssertionContext(ctx.to_owned()),
95        }
96    }
97}
98
99pub fn to_labelled_instructions(instructions: &[InstructionToken]) -> Vec<LabelledInstruction> {
100    instructions
101        .iter()
102        .map(|instruction| instruction.to_labelled_instruction())
103        .collect()
104}
105
106/// Parse a program
107pub(crate) fn parse(input: &str) -> Result<Vec<InstructionToken<'_>>, ParseError<'_>> {
108    let (_, instructions) = tokenize(input)
109        .finish()
110        .map_err(|errors| ParseError { input, errors })?;
111
112    ensure_no_missing_or_duplicate_labels(input, &instructions)?;
113    ensure_assertion_context_is_matched_with_assertion(input, &instructions)?;
114
115    Ok(instructions)
116}
117
118fn ensure_no_missing_or_duplicate_labels<'a>(
119    input: &'a str,
120    instructions: &[InstructionToken<'a>],
121) -> Result<(), ParseError<'a>> {
122    // identify all and duplicate labels
123    let mut seen_labels = HashMap::<_, InstructionToken>::default();
124    let mut duplicate_labels = HashSet::default();
125    for instruction in instructions {
126        if let InstructionToken::Label(label, _) = instruction {
127            if let Some(first_occurrence) = seen_labels.get(label.as_str()) {
128                duplicate_labels.insert(first_occurrence.to_owned());
129                duplicate_labels.insert(instruction.to_owned());
130            } else {
131                seen_labels.insert(label.as_str(), instruction.to_owned());
132            }
133        }
134    }
135
136    let missing_labels = identify_missing_labels(instructions, seen_labels);
137
138    if duplicate_labels.is_empty() && missing_labels.is_empty() {
139        return Ok(());
140    }
141
142    let errors = errors_for_duplicate_and_missing_labels(duplicate_labels, missing_labels);
143    Err(ParseError { input, errors })
144}
145
146fn identify_missing_labels<'a>(
147    instructions: &[InstructionToken<'a>],
148    seen_labels: HashMap<&str, InstructionToken>,
149) -> HashSet<InstructionToken<'a>> {
150    let mut missing_labels = HashSet::default();
151    for instruction in instructions {
152        if let InstructionToken::Instruction(AnInstruction::Call(label), _) = instruction {
153            if !seen_labels.contains_key(label.as_str()) {
154                missing_labels.insert(instruction.to_owned());
155            }
156        }
157    }
158    missing_labels
159}
160
161fn errors_for_duplicate_and_missing_labels<'a>(
162    duplicate_labels: HashSet<InstructionToken<'a>>,
163    missing_labels: HashSet<InstructionToken<'a>>,
164) -> VerboseError<&'a str> {
165    let duplicate_label_error_context = VerboseErrorKind::Context("duplicate label");
166    let missing_label_error_context = VerboseErrorKind::Context("missing label");
167
168    let duplicate_label_errors =
169        errors_for_labels_with_context(duplicate_labels, duplicate_label_error_context);
170    let missing_label_errors =
171        errors_for_labels_with_context(missing_labels, missing_label_error_context);
172
173    let errors = [duplicate_label_errors, missing_label_errors].concat();
174    VerboseError { errors }
175}
176
177fn errors_for_labels_with_context(
178    labels: HashSet<InstructionToken<'_>>,
179    context: VerboseErrorKind,
180) -> Vec<(&str, VerboseErrorKind)> {
181    labels
182        .into_iter()
183        .map(|label| (label.token_str(), context.clone()))
184        .collect()
185}
186
187fn ensure_assertion_context_is_matched_with_assertion<'a>(
188    input: &'a str,
189    instructions: &[InstructionToken<'a>],
190) -> Result<(), ParseError<'a>> {
191    type IT<'a> = InstructionToken<'a>;
192
193    let mut accept_id = false;
194    let mut incorrectly_placed_contexts = HashSet::new();
195
196    for instruction in instructions {
197        match instruction {
198            IT::Instruction(AnInstruction::Assert | AnInstruction::AssertVector, _) => {
199                accept_id = true;
200            }
201            IT::AssertionContext(AssertionContext::ID(_), _) => {
202                if !accept_id {
203                    incorrectly_placed_contexts.insert(instruction.clone());
204                }
205                accept_id = false;
206            }
207
208            // any assertion context must immediately follow an `assert` or
209            // `assert_vector`
210            _ => accept_id = false,
211        }
212    }
213
214    let parser_context = VerboseErrorKind::Context("incorrectly placed assertion context");
215    let errors = errors_for_labels_with_context(incorrectly_placed_contexts, parser_context);
216
217    if errors.is_empty() {
218        Ok(())
219    } else {
220        let errors = VerboseError { errors };
221        Err(ParseError { input, errors })
222    }
223}
224
225/// Auxiliary type alias: `IResult` defaults to `nom::error::Error` as concrete
226/// error type, but we want `nom::error::VerboseError` as it allows `context()`.
227type ParseResult<'input, Out> = IResult<&'input str, Out, VerboseError<&'input str>>;
228
229pub fn tokenize(s: &str) -> ParseResult<'_, Vec<InstructionToken<'_>>> {
230    let (s, _) = comment_or_whitespace0(s)?;
231    let (s, instructions) = many0(alt((
232        label,
233        labelled_instruction,
234        breakpoint,
235        type_hint,
236        assertion_context,
237    )))
238    .parse(s)?;
239    let (s, _) = context("expected label, instruction, or end of file", eof).parse(s)?;
240
241    Ok((s, instructions))
242}
243
244fn label(label_s: &str) -> ParseResult<'_, InstructionToken<'_>> {
245    let (s, addr) = label_addr(label_s)?;
246    let (s, _) = whitespace0(s)?; // whitespace between label and ':' is allowed
247    let (s, _) = token0(":")(s)?; // don't require space after ':'
248
249    // Checking if `<label>:` is an instruction must happen after parsing `:`.
250    // Otherwise, alternative parsers of `label` (like `labelled_instruction`)
251    // are rejected, even though valid instruction names *are* allowed there.
252    assert_label_is_legal(s, &addr)?;
253
254    Ok((s, InstructionToken::Label(addr, label_s)))
255}
256
257fn breakpoint(breakpoint_s: &str) -> ParseResult<'_, InstructionToken<'_>> {
258    let (s, _) = token1("break")(breakpoint_s)?;
259    Ok((s, InstructionToken::Breakpoint(breakpoint_s)))
260}
261
262fn labelled_instruction(s_instr: &str) -> ParseResult<'_, InstructionToken<'_>> {
263    let (s, instr) = an_instruction(s_instr)?;
264    Ok((s, InstructionToken::Instruction(instr, s_instr)))
265}
266
267fn an_instruction(s: &str) -> ParseResult<'_, AnInstruction<String>> {
268    // OpStack manipulation
269    let pop = pop_instruction();
270    let push = push_instruction();
271    let divine = divine_instruction();
272    let pick = pick_instruction();
273    let place = place_instruction();
274    let dup = dup_instruction();
275    let swap = swap_instruction();
276
277    let opstack_manipulation = alt((pop, push, divine, pick, place, dup, swap));
278
279    // Control flow
280    let halt = instruction("halt", AnInstruction::Halt);
281    let nop = instruction("nop", AnInstruction::Nop);
282    let skiz = instruction("skiz", AnInstruction::Skiz);
283    let call = call_instruction();
284    let return_ = instruction("return", AnInstruction::Return);
285    let recurse = instruction("recurse", AnInstruction::Recurse);
286    let recurse_or_return = instruction("recurse_or_return", AnInstruction::RecurseOrReturn);
287    let assert = instruction("assert", AnInstruction::Assert);
288
289    let control_flow = alt((nop, skiz, call, return_, halt));
290
291    // Memory access
292    let read_mem = read_mem_instruction();
293    let write_mem = write_mem_instruction();
294
295    let memory_access = alt((read_mem, write_mem));
296
297    // Hashing-related instructions
298    let hash = instruction("hash", AnInstruction::Hash);
299    let assert_vector = instruction("assert_vector", AnInstruction::AssertVector);
300    let sponge_init = instruction("sponge_init", AnInstruction::SpongeInit);
301    let sponge_absorb = instruction("sponge_absorb", AnInstruction::SpongeAbsorb);
302    let sponge_absorb_mem = instruction("sponge_absorb_mem", AnInstruction::SpongeAbsorbMem);
303    let sponge_squeeze = instruction("sponge_squeeze", AnInstruction::SpongeSqueeze);
304
305    let hashing_related = alt((hash, sponge_init, sponge_squeeze));
306
307    // Arithmetic on stack instructions
308    let add = instruction("add", AnInstruction::Add);
309    let addi = addi_instruction();
310    let mul = instruction("mul", AnInstruction::Mul);
311    let invert = instruction("invert", AnInstruction::Invert);
312    let eq = instruction("eq", AnInstruction::Eq);
313    let split = instruction("split", AnInstruction::Split);
314    let lt = instruction("lt", AnInstruction::Lt);
315    let and = instruction("and", AnInstruction::And);
316    let xor = instruction("xor", AnInstruction::Xor);
317    let log_2_floor = instruction("log_2_floor", AnInstruction::Log2Floor);
318    let pow = instruction("pow", AnInstruction::Pow);
319    let div_mod = instruction("div_mod", AnInstruction::DivMod);
320    let pop_count = instruction("pop_count", AnInstruction::PopCount);
321    let xx_add = instruction("xx_add", AnInstruction::XxAdd);
322    let xx_mul = instruction("xx_mul", AnInstruction::XxMul);
323    let x_invert = instruction("x_invert", AnInstruction::XInvert);
324    let xb_mul = instruction("xb_mul", AnInstruction::XbMul);
325
326    let base_field_arithmetic_on_stack = alt((mul, invert, eq));
327    let bitwise_arithmetic_on_stack =
328        alt((split, lt, and, xor, log_2_floor, pow, div_mod, pop_count));
329    let extension_field_arithmetic_on_stack = alt((xx_add, xx_mul, x_invert, xb_mul));
330    let arithmetic_on_stack = alt((
331        base_field_arithmetic_on_stack,
332        bitwise_arithmetic_on_stack,
333        extension_field_arithmetic_on_stack,
334    ));
335
336    // Read/write
337    let read_io = read_io_instruction();
338    let write_io = write_io_instruction();
339
340    let read_write = alt((read_io, write_io));
341
342    // Many-in-One
343    let merkle_step = instruction("merkle_step", AnInstruction::MerkleStep);
344    let merkle_step_mem = instruction("merkle_step_mem", AnInstruction::MerkleStepMem);
345    let xx_dot_step = instruction("xx_dot_step", AnInstruction::XxDotStep);
346    let xb_dot_step = instruction("xb_dot_step", AnInstruction::XbDotStep);
347
348    let many_to_one = alt((xx_dot_step, xb_dot_step));
349
350    // Because of common prefixes, the following parsers are sensitive to order.
351    // Successfully parsing "assert" before trying "assert_vector" can lead to
352    // picking the wrong one. By trying them in the order of longest first, less
353    // backtracking is necessary.
354    let syntax_ambiguous = alt((
355        recurse_or_return,
356        recurse,
357        assert_vector,
358        assert,
359        sponge_absorb_mem,
360        sponge_absorb,
361        addi,
362        add,
363        merkle_step_mem,
364        merkle_step,
365    ));
366
367    alt((
368        opstack_manipulation,
369        control_flow,
370        memory_access,
371        hashing_related,
372        arithmetic_on_stack,
373        read_write,
374        many_to_one,
375        syntax_ambiguous,
376    ))
377    .parse(s)
378}
379
380fn assert_label_is_legal<'s>(
381    s: &'s str,
382    label: &str,
383) -> Result<(), nom::Err<VerboseError<&'s str>>> {
384    if ALL_INSTRUCTION_NAMES.contains(&label) || KEYWORDS.contains(&label) {
385        let fail_context = "label must be neither instruction nor keyword";
386        let errors = vec![(s, VerboseErrorKind::Context(fail_context))];
387        return Err(nom::Err::Failure(VerboseError { errors }));
388    }
389
390    Ok(())
391}
392
393fn instruction<'a>(
394    name: &'a str,
395    instruction: AnInstruction<String>,
396) -> impl Fn(&'a str) -> ParseResult<'a, AnInstruction<String>> {
397    move |s: &'a str| {
398        let (s, _) = token1(name)(s)?;
399        Ok((s, instruction.clone()))
400    }
401}
402
403fn pop_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
404    move |s: &str| {
405        let (s, _) = token1("pop")(s)?;
406        let (s, arg) = cut(number_of_words).parse(s)?;
407        Ok((s, AnInstruction::Pop(arg)))
408    }
409}
410
411fn push_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
412    move |s: &str| {
413        let (s, _) = token1("push")(s)?;
414        let (s, elem) = cut(field_element).parse(s)?;
415        Ok((s, AnInstruction::Push(elem)))
416    }
417}
418
419fn addi_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
420    move |s: &str| {
421        let (s, _) = token1("addi")(s)?;
422        let (s, elem) = cut(field_element).parse(s)?;
423        Ok((s, AnInstruction::AddI(elem)))
424    }
425}
426
427fn divine_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
428    move |s: &str| {
429        let (s, _) = token1("divine")(s)?;
430        let (s, arg) = cut(number_of_words).parse(s)?;
431        Ok((s, AnInstruction::Divine(arg)))
432    }
433}
434
435fn pick_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
436    move |s: &str| {
437        let (s, _) = token1("pick")(s)?;
438        let (s, arg) = cut(stack_register).parse(s)?;
439        Ok((s, AnInstruction::Pick(arg)))
440    }
441}
442
443fn place_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
444    move |s: &str| {
445        let (s, _) = token1("place")(s)?;
446        let (s, arg) = cut(stack_register).parse(s)?;
447        Ok((s, AnInstruction::Place(arg)))
448    }
449}
450
451fn dup_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
452    move |s: &str| {
453        let (s, _) = token1("dup")(s)?; // require space before argument
454        let (s, stack_register) = cut(stack_register).parse(s)?;
455        Ok((s, AnInstruction::Dup(stack_register)))
456    }
457}
458
459fn swap_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
460    move |s: &str| {
461        let (s, _) = token1("swap")(s)?;
462        let (s, stack_register) = cut(stack_register).parse(s)?;
463        Ok((s, AnInstruction::Swap(stack_register)))
464    }
465}
466
467fn call_instruction<'a>() -> impl Fn(&'a str) -> ParseResult<'a, AnInstruction<String>> {
468    move |s: &'a str| {
469        let (s, _) = token1("call")(s)?;
470        let (s, label) = cut(label_addr).parse(s)?;
471        let (s, _) = comment_or_whitespace1(s)?;
472
473        // This check cannot be moved into `label_addr`, since `label_addr` is
474        // shared between the scenarios `<label>:` and `call <label>`; the
475        // former requires parsing the `:` before rejecting a possible
476        // instruction name in the label.
477        assert_label_is_legal(s, &label)?;
478
479        Ok((s, AnInstruction::Call(label)))
480    }
481}
482
483fn read_mem_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
484    move |s: &str| {
485        let (s, _) = token1("read_mem")(s)?;
486        let (s, arg) = cut(number_of_words).parse(s)?;
487        Ok((s, AnInstruction::ReadMem(arg)))
488    }
489}
490
491fn write_mem_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
492    move |s: &str| {
493        let (s, _) = token1("write_mem")(s)?;
494        let (s, arg) = cut(number_of_words).parse(s)?;
495        Ok((s, AnInstruction::WriteMem(arg)))
496    }
497}
498
499fn read_io_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
500    move |s: &str| {
501        let (s, _) = token1("read_io")(s)?;
502        let (s, arg) = cut(number_of_words).parse(s)?;
503        Ok((s, AnInstruction::ReadIo(arg)))
504    }
505}
506
507fn write_io_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
508    move |s: &str| {
509        let (s, _) = token1("write_io")(s)?;
510        let (s, arg) = cut(number_of_words).parse(s)?;
511        Ok((s, AnInstruction::WriteIo(arg)))
512    }
513}
514
515fn field_element(s_orig: &str) -> ParseResult<'_, BFieldElement> {
516    let (s, n) = context(
517        "expected a reasonably-sized integer",
518        nom::character::complete::i128,
519    )
520    .parse(s_orig)?;
521    let (s, _) = comment_or_whitespace1(s)?;
522
523    let p = i128::from(BFieldElement::P);
524    if n <= -p || p <= n {
525        let fail_context = "out-of-bounds constant";
526        let errors = vec![(s, VerboseErrorKind::Context(fail_context))];
527        return Err(nom::Err::Error(VerboseError { errors }));
528    }
529    let n = if n >= 0 { n } else { n + p };
530
531    Ok((s, BFieldElement::new(n as u64)))
532}
533
534fn stack_register(s: &str) -> ParseResult<'_, OpStackElement> {
535    let (s, n) = digit1(s)?;
536    let stack_register = match n {
537        "0" => OpStackElement::ST0,
538        "1" => OpStackElement::ST1,
539        "2" => OpStackElement::ST2,
540        "3" => OpStackElement::ST3,
541        "4" => OpStackElement::ST4,
542        "5" => OpStackElement::ST5,
543        "6" => OpStackElement::ST6,
544        "7" => OpStackElement::ST7,
545        "8" => OpStackElement::ST8,
546        "9" => OpStackElement::ST9,
547        "10" => OpStackElement::ST10,
548        "11" => OpStackElement::ST11,
549        "12" => OpStackElement::ST12,
550        "13" => OpStackElement::ST13,
551        "14" => OpStackElement::ST14,
552        "15" => OpStackElement::ST15,
553        _ => {
554            let fail_context = "using an out-of-bounds stack register (0-15 exist)";
555            let errors = vec![(s, VerboseErrorKind::Context(fail_context))];
556            return Err(nom::Err::Error(VerboseError { errors }));
557        }
558    };
559    let (s, _) = comment_or_whitespace1(s)?;
560
561    Ok((s, stack_register))
562}
563
564fn number_of_words(s: &str) -> ParseResult<'_, NumberOfWords> {
565    let (s, n) = digit1(s)?;
566    let arg = match n {
567        "1" => NumberOfWords::N1,
568        "2" => NumberOfWords::N2,
569        "3" => NumberOfWords::N3,
570        "4" => NumberOfWords::N4,
571        "5" => NumberOfWords::N5,
572        _ => {
573            let fail_context = "using an out-of-bounds argument (1-5 allowed)";
574            let errors = vec![(s, VerboseErrorKind::Context(fail_context))];
575            return Err(nom::Err::Error(VerboseError { errors }));
576        }
577    };
578    let (s, _) = comment_or_whitespace1(s)?; // require space after element
579
580    Ok((s, arg))
581}
582
583/// Parse a label address. This is used in "`<label>:`" and in "`call <label>`".
584fn label_addr(s_orig: &str) -> ParseResult<'_, String> {
585    let (s, addr_part_0) = context(
586        "label must start with an alphabetic character or `_`",
587        take_while1(|c: char| c.is_alphabetic() || c == '_'),
588    )
589    .parse(s_orig)?;
590    let (s, addr_part_1) = context(
591        "label must contain only alphanumeric characters or `_` or `-`",
592        take_while(|c: char| c.is_alphanumeric() || c == '_' || c == '-'),
593    )
594    .parse(s)?;
595
596    Ok((s, format!("{addr_part_0}{addr_part_1}")))
597}
598
599/// Parse 0 or more comments and/or whitespace.
600///
601/// This is used in places where whitespace is optional (e.g. after ':').
602fn comment_or_whitespace0(s: &str) -> ParseResult<'_, &str> {
603    let (s, _) = many0(alt((comment1, whitespace1))).parse(s)?;
604    Ok((s, ""))
605}
606
607/// Parse at least one comment and/or whitespace, or [eof].
608///
609/// This is used in places where whitespace is mandatory (e.g. after tokens).
610fn comment_or_whitespace1<'a>(s: &'a str) -> ParseResult<'a, &'a str> {
611    let cws1 = |s: &'a str| -> ParseResult<&str> {
612        let (s, _) = many1(alt((comment1, whitespace1))).parse(s)?;
613        Ok((s, ""))
614    };
615    alt((eof, cws1)).parse(s)
616}
617
618/// Parse one comment
619fn comment1(s: &str) -> ParseResult<'_, ()> {
620    alt((eol_comment, inline_comment)).parse(s)
621}
622
623/// Parse one “//”-comment (not including the linebreak)
624fn eol_comment(s: &str) -> ParseResult<'_, ()> {
625    let (s, _) = tag("//").parse(s)?;
626    let (s, _) = is_not("\n\r").parse(s)?;
627
628    Ok((s, ()))
629}
630
631/// Parse one “/* … */”-comment
632fn inline_comment(s: &str) -> ParseResult<'_, ()> {
633    let (s, _) = tag("/*").parse(s)?;
634    let (s, _) = take_until("*/").parse(s)?;
635    let (s, _) = tag("*/").parse(s)?;
636
637    Ok((s, ()))
638}
639
640/// Parse whitespace characters (can be none)
641fn whitespace0(s: &str) -> ParseResult<'_, ()> {
642    let (s, _) = take_while(char::is_whitespace)(s)?;
643    Ok((s, ()))
644}
645
646/// Parse at least one whitespace character
647fn whitespace1(s: &str) -> ParseResult<'_, ()> {
648    let (s, _) = take_while1(char::is_whitespace)(s)?;
649    Ok((s, ()))
650}
651
652/// `token0(tok)` will parse the string `tok` and munch 0 or more comment or
653/// whitespace.
654fn token0<'a>(token: &'a str) -> impl Fn(&'a str) -> ParseResult<'a, ()> {
655    move |s: &'a str| {
656        let (s, _) = tag(token)(s)?;
657        let (s, _) = comment_or_whitespace0(s)?;
658        Ok((s, ()))
659    }
660}
661
662/// `token1(tok)` will parse the string `tok` and munch at least one comment
663/// and/or whitespace, or eof.
664fn token1<'a>(token: &'a str) -> impl Fn(&'a str) -> ParseResult<'a, ()> {
665    move |s: &'a str| {
666        let (s, _) = tag(token)(s)?;
667        let (s, _) = comment_or_whitespace1(s)?;
668        Ok((s, ()))
669    }
670}
671
672/// Parse one type hint.
673///
674/// Type hints look like this:
675///
676/// ```text
677/// hint <variable_name>[: <type_name>] = stack\[<range_start>[..<range_end>]\]
678/// ```
679fn type_hint(s_type_hint: &str) -> ParseResult<'_, InstructionToken<'_>> {
680    let (s, _) = token1("hint")(s_type_hint)?;
681    let (s, variable_name) = context(
682        "type hint requires variable's name",
683        cut(type_hint_variable_name),
684    )
685    .parse(s)?;
686    let (s, type_name) = context(
687        "type hint's type name is malformed",
688        cut(type_hint_type_name),
689    )
690    .parse(s)?;
691    let (s, _) = context("syntax error", cut(token0("="))).parse(s)?;
692    let (s, _) = context("syntax error", cut(tag("stack"))).parse(s)?;
693    let (s, _) = context("syntax error", cut(token0("["))).parse(s)?;
694    let (s, range_start) = context(
695        "type hint requires a stack position (or range) indicator",
696        cut(nom::character::complete::usize),
697    )
698    .parse(s)?;
699    let (s, _) = whitespace0(s)?;
700    let (s, maybe_range_end) = context(
701        "type hint's stack range indicator is malformed",
702        cut(type_hint_ending_index),
703    )
704    .parse(s)?;
705    let (s, _) = whitespace0(s)?;
706    let (s, _) = context(
707        "type hint requires closing bracket for stack position (or range) indicator",
708        cut(token0("]")),
709    )
710    .parse(s)?;
711
712    let length = match maybe_range_end {
713        Some(range_end) if range_end <= range_start => {
714            let fail_context = "type hint's stack range indicator must be non-empty";
715            let errors = vec![(s, VerboseErrorKind::Context(fail_context))];
716            return Err(nom::Err::Failure(VerboseError { errors }));
717        }
718        Some(range_end) => range_end - range_start,
719        None => 1,
720    };
721
722    let type_hint = TypeHint {
723        starting_index: range_start,
724        length,
725        type_name,
726        variable_name,
727    };
728
729    Ok((s, InstructionToken::TypeHint(type_hint, s_type_hint)))
730}
731
732fn type_hint_variable_name(s: &str) -> ParseResult<'_, String> {
733    let (s, variable_name_start) = context(
734        "type hint's variable name must start with a lowercase alphabetic character or `_`",
735        take_while1(|c: char| (c.is_alphabetic() && c.is_lowercase()) || c == '_'),
736    )
737    .parse(s)?;
738    let (s, variable_name_end) = context(
739        "type hint's variable name must contain only lowercase alphanumeric characters or `_`",
740        take_while(|c: char| (c.is_alphabetic() && c.is_lowercase()) || c == '_' || c.is_numeric()),
741    )
742    .parse(s)?;
743    let (s, _) = whitespace0(s)?;
744    let variable_name = format!("{variable_name_start}{variable_name_end}");
745    Ok((s, variable_name))
746}
747
748fn type_hint_type_name(s: &str) -> ParseResult<'_, Option<String>> {
749    #[derive(Debug, Clone)]
750    pub enum TypeHintType<'a> {
751        Simple(&'a str),
752        Generic(&'a str, Vec<TypeHintType<'a>>),
753    }
754
755    impl Display for TypeHintType<'_> {
756        fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
757            match self {
758                TypeHintType::Simple(ty) => write!(f, "{ty}"),
759                TypeHintType::Generic(ty, ge) => write!(f, "{ty}<{ge}>", ge = ge.iter().join(", ")),
760            }
761        }
762    }
763
764    fn ty(input: &str) -> ParseResult<'_, TypeHintType<'_>> {
765        alt((generic_ty, simple_ty)).parse(input)
766    }
767
768    fn simple_ty(s: &str) -> ParseResult<'_, TypeHintType<'_>> {
769        let (s, ty) = ty_name(s)?;
770        Ok((s, TypeHintType::Simple(ty)))
771    }
772
773    fn generic_ty(s: &str) -> ParseResult<'_, TypeHintType<'_>> {
774        let (s, name) = ty_name(s)?;
775        let (s, _) = comment_or_whitespace0(s)?;
776
777        let list_elem = delimited(comment_or_whitespace0, ty, comment_or_whitespace0);
778        let separator = || delimited(comment_or_whitespace0, char(','), comment_or_whitespace0);
779        let ty_list = separated_list1(separator(), list_elem);
780        let ty_list = terminated(ty_list, opt(separator()));
781        let empty_list = value(Vec::new(), comment_or_whitespace1);
782        let (s, generics) = delimited(tag("<"), alt((ty_list, empty_list)), tag(">")).parse(s)?;
783
784        let ty = if generics.is_empty() {
785            TypeHintType::Simple(name)
786        } else {
787            TypeHintType::Generic(name, generics)
788        };
789
790        Ok((s, ty))
791    }
792
793    fn ty_name(input: &str) -> ParseResult<'_, &str> {
794        let (s, stars) = take_while(|c: char| c == '*').parse(input)?;
795        let (s, begin) = take_while1(|c: char| c.is_ascii_alphabetic() || c == '_').parse(s)?;
796        let (s, rest) = take_while(|c: char| c.is_ascii_alphanumeric() || c == '_').parse(s)?;
797
798        Ok((s, &input[..stars.len() + begin.len() + rest.len()]))
799    }
800
801    let Ok((s, _)) = token0(":")(s) else {
802        return Ok((s, None));
803    };
804
805    let (s, type_name) = cut(ty).parse(s)?;
806    let (s, _) = whitespace0(s)?;
807
808    Ok((s, Some(type_name.to_string())))
809}
810
811fn type_hint_ending_index(s: &str) -> ParseResult<'_, Option<usize>> {
812    let Ok((s, _)) = token0("..")(s) else {
813        return Ok((s, None));
814    };
815    let (s, range_end) = context(
816        "type hint with range indicator requires a range end",
817        cut(nom::character::complete::usize),
818    )
819    .parse(s)?;
820    let (s, _) = whitespace0(s)?;
821
822    Ok((s, Some(range_end)))
823}
824
825fn assertion_context(s_ctx: &str) -> ParseResult<'_, InstructionToken<'_>> {
826    let (s, assertion_context) = assertion_context_id(s_ctx)?;
827    let assertion_context = InstructionToken::AssertionContext(assertion_context, s_ctx);
828
829    Ok((s, assertion_context))
830}
831
832fn assertion_context_id(s_ctx: &str) -> ParseResult<'_, AssertionContext> {
833    let (s, _) = token1("error_id")(s_ctx)?;
834    let (s, id) = context(
835        "expected a valid error ID of type i128",
836        cut(nom::character::complete::i128),
837    )
838    .parse(s)?;
839    let (s, _) = comment_or_whitespace1(s)?;
840
841    let assertion_context = AssertionContext::ID(id);
842    Ok((s, assertion_context))
843}
844
845pub(crate) fn build_label_to_address_map(program: &[LabelledInstruction]) -> HashMap<String, u64> {
846    let mut label_map = HashMap::new();
847    let mut instruction_pointer = 0;
848
849    for labelled_instruction in program {
850        if let LabelledInstruction::Instruction(instruction) = labelled_instruction {
851            instruction_pointer += instruction.size() as u64;
852            continue;
853        }
854
855        let LabelledInstruction::Label(label) = labelled_instruction else {
856            continue;
857        };
858        let Entry::Vacant(new_label_map_entry) = label_map.entry(label.clone()) else {
859            panic!("Duplicate label: {label}");
860        };
861        new_label_map_entry.insert(instruction_pointer);
862    }
863
864    label_map
865}
866
867pub(crate) fn turn_labels_into_addresses(
868    labelled_instructions: &[LabelledInstruction],
869    label_to_address: &HashMap<String, u64>,
870) -> Vec<Instruction> {
871    fn turn_label_to_address_for_instruction(
872        labelled_instruction: &LabelledInstruction,
873        label_map: &HashMap<String, u64>,
874    ) -> Option<Instruction> {
875        let LabelledInstruction::Instruction(instruction) = labelled_instruction else {
876            return None;
877        };
878
879        let instruction_with_absolute_address =
880            instruction.map_call_address(|label| address_for_label(label, label_map));
881        Some(instruction_with_absolute_address)
882    }
883
884    fn address_for_label(label: &str, label_map: &HashMap<String, u64>) -> BFieldElement {
885        let maybe_address = label_map.get(label).map(|&a| bfe!(a));
886        maybe_address.unwrap_or_else(|| panic!("Label not found: {label}"))
887    }
888
889    labelled_instructions
890        .iter()
891        .filter_map(|inst| turn_label_to_address_for_instruction(inst, label_to_address))
892        .flat_map(|inst| vec![inst; inst.size()])
893        .collect()
894}
895
896#[cfg(test)]
897#[cfg_attr(coverage_nightly, coverage(off))]
898pub(crate) mod tests {
899    use assert2::assert;
900    use assert2::let_assert;
901    use itertools::Itertools;
902    use proptest::prelude::*;
903    use proptest_arbitrary_interop::arb;
904    use rand::Rng;
905    use strum::EnumCount;
906    use test_strategy::Arbitrary;
907    use test_strategy::proptest;
908    use twenty_first::bfe;
909    use twenty_first::prelude::Digest;
910
911    use crate::triton_asm;
912    use crate::triton_instr;
913    use crate::triton_program;
914
915    use super::*;
916
917    #[must_use]
918    struct TestCase<'a> {
919        input: &'a str,
920        expected: Vec<Instruction>,
921        message: &'static str,
922    }
923
924    #[must_use]
925    struct NegativeTestCase<'a> {
926        input: &'a str,
927        expected_error: &'static str,
928        expected_error_count: usize,
929        message: &'static str,
930    }
931
932    impl TestCase<'_> {
933        fn run(&self) {
934            let message = self.message;
935            let parse_result = parse(self.input).map_err(|err| format!("{message}:\n{err}"));
936            let_assert!(Ok(actual) = parse_result);
937
938            let labelled_instructions = to_labelled_instructions(&actual);
939            let label_to_address = build_label_to_address_map(&labelled_instructions);
940            let instructions =
941                turn_labels_into_addresses(&labelled_instructions, &label_to_address);
942            assert!(self.expected == instructions, "{message}");
943        }
944    }
945
946    impl NegativeTestCase<'_> {
947        fn run(self) {
948            let Self {
949                input,
950                expected_error,
951                expected_error_count,
952                message,
953            } = self;
954
955            let parse_result = parse(input);
956            if let Ok(instructions) = parse_result {
957                eprintln!("parser input: {input}");
958                eprintln!("parser output: {instructions:?}");
959                panic!("parser should fail, but didn't: {message}");
960            }
961
962            let actual_error = parse_result.map_err(|e| e.to_string()).unwrap_err();
963            let actual_error_count = actual_error.match_indices(expected_error).count();
964            if expected_error_count != actual_error_count {
965                eprintln!("Expected error message ({expected_error_count} times):");
966                eprintln!("{expected_error}");
967                eprintln!("Actual error message (found {actual_error_count} times):");
968                eprintln!("{actual_error}");
969                panic!("Additional context: {message}");
970            }
971        }
972    }
973
974    #[test]
975    fn parse_program_empty() {
976        TestCase {
977            input: "",
978            expected: vec![],
979            message: "empty string should parse as empty program",
980        }
981        .run();
982
983        TestCase {
984            input: "   ",
985            expected: vec![],
986            message: "spaces should parse as empty program",
987        }
988        .run();
989
990        TestCase {
991            input: "\n",
992            expected: vec![],
993            message: "linebreaks should parse as empty program (1)",
994        }
995        .run();
996
997        TestCase {
998            input: "   \n ",
999            expected: vec![],
1000            message: "linebreaks should parse as empty program (2)",
1001        }
1002        .run();
1003
1004        TestCase {
1005            input: "   \n \n",
1006            expected: vec![],
1007            message: "linebreaks should parse as empty program (3)",
1008        }
1009        .run();
1010
1011        TestCase {
1012            input: "// empty program",
1013            expected: vec![],
1014            message: "single comment should parse as empty program",
1015        }
1016        .run();
1017
1018        TestCase {
1019            input: "// empty program\n",
1020            expected: vec![],
1021            message: "single comment with linebreak should parse as empty program",
1022        }
1023        .run();
1024
1025        TestCase {
1026            input: "// multi-line\n// comment",
1027            expected: vec![],
1028            message: "multiple comments should parse as empty program",
1029        }
1030        .run();
1031
1032        TestCase {
1033            input: "// multi-line\n// comment\n ",
1034            expected: vec![],
1035            message: "multiple comments with trailing whitespace should parse as empty program",
1036        }
1037        .run();
1038    }
1039
1040    #[test]
1041    fn parse_simple_programs() {
1042        TestCase {
1043            input: "halt",
1044            expected: vec![Instruction::Halt],
1045            message: "most simple program should work",
1046        }
1047        .run();
1048
1049        TestCase {
1050            input: "push -1 push 0 push 1",
1051            expected: [-1, -1, 0, 0, 1, 1]
1052                .map(|i| Instruction::Push(bfe!(i)))
1053                .to_vec(),
1054            message: "simple parameters for push should work",
1055        }
1056        .run();
1057    }
1058
1059    #[proptest]
1060    fn arbitrary_whitespace_and_comment_sequence_is_empty_program(whitespace: Vec<Whitespace>) {
1061        let whitespace = whitespace.into_iter().join("");
1062        TestCase {
1063            input: &whitespace,
1064            expected: vec![],
1065            message: "arbitrary whitespace should parse as empty program",
1066        }
1067        .run();
1068    }
1069
1070    #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, EnumCount, Arbitrary)]
1071    enum Whitespace {
1072        Space,
1073        Tab,
1074        CarriageReturn,
1075        LineFeed,
1076        CarriageReturnLineFeed,
1077        Comment,
1078    }
1079
1080    impl Display for Whitespace {
1081        fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
1082            match self {
1083                Self::Space => write!(f, " "),
1084                Self::Tab => write!(f, "\t"),
1085                Self::CarriageReturn => write!(f, "\r"),
1086                Self::LineFeed => writeln!(f),
1087                Self::CarriageReturnLineFeed => writeln!(f, "\r"),
1088                Self::Comment => writeln!(f, " // comment"),
1089            }
1090        }
1091    }
1092
1093    #[test]
1094    fn parse_program_whitespace() {
1095        NegativeTestCase {
1096            input: "poppop",
1097            expected_error: "n/a",
1098            expected_error_count: 0,
1099            message: "whitespace required between instructions (pop)",
1100        }
1101        .run();
1102
1103        NegativeTestCase {
1104            input: "dup 0dup 0",
1105            expected_error: "n/a",
1106            expected_error_count: 0,
1107            message: "whitespace required between instructions (dup 0)",
1108        }
1109        .run();
1110
1111        NegativeTestCase {
1112            input: "swap 10swap 10",
1113            expected_error: "n/a",
1114            expected_error_count: 0,
1115            message: "whitespace required between instructions (swap 10)",
1116        }
1117        .run();
1118
1119        NegativeTestCase {
1120            input: "push10",
1121            expected_error: "n/a",
1122            expected_error_count: 0,
1123            message: "push requires whitespace before its constant",
1124        }
1125        .run();
1126
1127        NegativeTestCase {
1128            input: "push 10pop",
1129            expected_error: "n/a",
1130            expected_error_count: 0,
1131            message: "push requires whitespace after its constant",
1132        }
1133        .run();
1134
1135        NegativeTestCase {
1136            input: "hello: callhello",
1137            expected_error: "n/a",
1138            expected_error_count: 0,
1139            message: "call requires whitespace before its label",
1140        }
1141        .run();
1142
1143        NegativeTestCase {
1144            input: "hello: popcall hello",
1145            expected_error: "n/a",
1146            expected_error_count: 0,
1147            message: "required space between pop and call",
1148        }
1149        .run();
1150    }
1151
1152    #[test]
1153    fn parse_program_label() {
1154        TestCase {
1155            input: "foo: call foo",
1156            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1157            message: "parse labels and calls to labels",
1158        }
1159        .run();
1160
1161        TestCase {
1162            input: "foo:call foo",
1163            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1164            message: "whitespace is not required after 'label:'",
1165        }
1166        .run();
1167
1168        // FIXME: Increase coverage of negative tests for duplicate labels.
1169        NegativeTestCase {
1170            input: "foo: pop 1 foo: pop 1 call foo",
1171            expected_error: "duplicate label",
1172            expected_error_count: 2,
1173            message: "labels cannot occur twice",
1174        }
1175        .run();
1176
1177        // FIXME: Increase coverage of negative tests for missing labels.
1178        NegativeTestCase {
1179            input: "foo: pop 1 call herp call derp",
1180            expected_error: "missing label",
1181            expected_error_count: 2,
1182            message: "non-existent labels cannot be called",
1183        }
1184        .run();
1185
1186        // FIXME: Increase coverage of negative tests for label/keyword overlap.
1187        NegativeTestCase {
1188            input: "pop: call pop",
1189            expected_error: "label must be neither instruction nor keyword",
1190            expected_error_count: 1,
1191            message: "label names may not overlap with instruction names",
1192        }
1193        .run();
1194
1195        TestCase {
1196            input: "pops: call pops",
1197            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1198            message: "labels that share a common prefix with instruction are labels",
1199        }
1200        .run();
1201
1202        TestCase {
1203            input: "_call: call _call",
1204            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1205            message: "labels that share a common suffix with instruction are labels",
1206        }
1207        .run();
1208    }
1209
1210    #[test]
1211    fn parse_program_nonexistent_instructions() {
1212        NegativeTestCase {
1213            input: "pop 0",
1214            expected_error: "using an out-of-bounds argument (1-5 allowed)",
1215            expected_error_count: 1,
1216            message: "instruction `pop` cannot take argument `0`",
1217        }
1218        .run();
1219
1220        NegativeTestCase {
1221            input: "swap 16",
1222            expected_error: "using an out-of-bounds stack register (0-15 exist)",
1223            expected_error_count: 1,
1224            message: "there is no swap 16 instruction",
1225        }
1226        .run();
1227
1228        NegativeTestCase {
1229            input: "dup 16",
1230            expected_error: "using an out-of-bounds stack register (0-15 exist)",
1231            expected_error_count: 1,
1232            message: "there is no dup 16 instruction",
1233        }
1234        .run();
1235    }
1236
1237    #[test]
1238    fn parse_program_bracket_syntax() {
1239        NegativeTestCase {
1240            input: "foo: [foo]",
1241            expected_error: "expected label, instruction, or end of file",
1242            expected_error_count: 1,
1243            message: "brackets as call syntax sugar are unsupported",
1244        }
1245        .run();
1246
1247        NegativeTestCase {
1248            input: "foo: [bar]",
1249            expected_error: "expected label, instruction, or end of file",
1250            expected_error_count: 1,
1251            message: "brackets as call syntax sugar are unsupported",
1252        }
1253        .run();
1254    }
1255
1256    #[test]
1257    fn parse_program_label_must_start_with_alphabetic_character_or_underscore() {
1258        NegativeTestCase {
1259            input: "1foo: call 1foo",
1260            expected_error: "expected label, instruction, or end of file",
1261            expected_error_count: 1,
1262            message: "labels cannot start with a digit",
1263        }
1264        .run();
1265
1266        NegativeTestCase {
1267            input: "-foo: call -foo",
1268            expected_error: "expected label, instruction, or end of file",
1269            expected_error_count: 1,
1270            message: "labels cannot start with a dash",
1271        }
1272        .run();
1273
1274        NegativeTestCase {
1275            input: "call -foo",
1276            expected_error: "label must start with an alphabetic character or `_`",
1277            expected_error_count: 1,
1278            message: "labels cannot start with a dash",
1279        }
1280        .run();
1281
1282        TestCase {
1283            input: "_foo: call _foo",
1284            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1285            message: "labels can start with an underscore",
1286        }
1287        .run();
1288    }
1289
1290    #[test]
1291    fn parse_type_hint_with_zero_length_range() {
1292        NegativeTestCase {
1293            input: "hint foo: Type = stack[0..0]",
1294            expected_error: "type hint's stack range indicator must be non-empty",
1295            expected_error_count: 1,
1296            message: "parse type hint with zero-length range",
1297        }
1298        .run();
1299    }
1300
1301    #[test]
1302    fn parse_type_hint_with_range_and_offset_and_missing_closing_bracket() {
1303        NegativeTestCase {
1304            input: "hint foo: Type = stack[2..5",
1305            expected_error: "type hint requires closing bracket",
1306            expected_error_count: 1,
1307            message: "parse type hint with range and offset and missing closing bracket",
1308        }
1309        .run();
1310    }
1311
1312    #[test]
1313    fn parse_type_hint_with_range_and_offset_and_missing_opening_bracket() {
1314        NegativeTestCase {
1315            input: "hint foo: Type = stack2..5]",
1316            expected_error: "syntax error",
1317            expected_error_count: 1,
1318            message: "parse type hint with range and offset and missing opening bracket",
1319        }
1320        .run();
1321    }
1322
1323    #[test]
1324    fn parse_type_hint_with_range_and_offset_and_missing_equals_sign() {
1325        NegativeTestCase {
1326            input: "hint foo: Type stack[2..5];",
1327            expected_error: "syntax error",
1328            expected_error_count: 1,
1329            message: "parse type hint with range and offset and missing equals sign",
1330        }
1331        .run();
1332    }
1333
1334    #[test]
1335    fn parse_type_hint_with_range_and_offset_and_missing_type_name() {
1336        NegativeTestCase {
1337            input: "hint foo: = stack[2..5]",
1338            expected_error: "type hint's type name is malformed",
1339            expected_error_count: 1,
1340            message: "parse type hint with range and offset and missing type name",
1341        }
1342        .run();
1343    }
1344
1345    #[test]
1346    fn parse_type_hint_with_range_and_offset_and_missing_variable_name() {
1347        NegativeTestCase {
1348            input: "hint : Type = stack[2..5]",
1349            expected_error: "label must be neither instruction nor keyword",
1350            expected_error_count: 1,
1351            message: "parse type hint with range and offset and missing variable name",
1352        }
1353        .run();
1354    }
1355
1356    #[test]
1357    fn parse_type_hint_with_range_and_offset_and_missing_colon() {
1358        NegativeTestCase {
1359            input: "hint foo Type = stack[2..5]",
1360            expected_error: "syntax error",
1361            expected_error_count: 1,
1362            message: "parse type hint with range and offset and missing colon",
1363        }
1364        .run();
1365    }
1366
1367    #[test]
1368    fn parse_type_hint_with_range_and_offset_and_missing_hint() {
1369        NegativeTestCase {
1370            input: "foo: Type = stack[2..5];",
1371            expected_error: "expected label, instruction, or end of file",
1372            expected_error_count: 1,
1373            message: "parse type hint with range and offset and missing hint",
1374        }
1375        .run();
1376    }
1377
1378    #[test]
1379    fn parse_simple_assertion_contexts() {
1380        TestCase {
1381            input: "assert",
1382            expected: vec![Instruction::Assert],
1383            message: "naked assert",
1384        }
1385        .run();
1386        TestCase {
1387            input: "assert_vector",
1388            expected: vec![Instruction::AssertVector],
1389            message: "naked assert_vector",
1390        }
1391        .run();
1392
1393        TestCase {
1394            input: "assert error_id 42",
1395            expected: vec![Instruction::Assert],
1396            message: "assert, then id",
1397        }
1398        .run();
1399        TestCase {
1400            input: "assert_vector error_id 42",
1401            expected: vec![Instruction::AssertVector],
1402            message: "assert_vector, then id",
1403        }
1404        .run();
1405
1406        TestCase {
1407            input: "assert error_id -42",
1408            expected: vec![Instruction::Assert],
1409            message: "assert, then negative id",
1410        }
1411        .run();
1412        TestCase {
1413            input: "assert_vector error_id -42",
1414            expected: vec![Instruction::AssertVector],
1415            message: "assert_vector, then negative id",
1416        }
1417        .run();
1418
1419        TestCase {
1420            input: "assert error_id 42 nop",
1421            expected: vec![Instruction::Assert, Instruction::Nop],
1422            message: "assert, then id, then nop",
1423        }
1424        .run();
1425        TestCase {
1426            input: "assert_vector error_id 42 nop",
1427            expected: vec![Instruction::AssertVector, Instruction::Nop],
1428            message: "assert_vector, then id, then nop",
1429        }
1430        .run();
1431    }
1432
1433    #[test]
1434    fn assertion_context_error_id_can_handle_edge_case_ids() {
1435        let instructions = [
1436            ("assert", Instruction::Assert),
1437            ("assert_vector", Instruction::AssertVector),
1438        ];
1439        let ids = [i128::MIN, -1, 0, 1, i128::MAX];
1440
1441        for ((instruction_str, instruction), id) in instructions.into_iter().cartesian_product(ids)
1442        {
1443            TestCase {
1444                input: &format!("{instruction_str} error_id {id}"),
1445                expected: vec![instruction],
1446                message: "assert, then edge case id",
1447            }
1448            .run();
1449        }
1450    }
1451
1452    #[test]
1453    fn assertion_context_error_id_fails_on_too_large_error_ids() {
1454        NegativeTestCase {
1455            input: "assert error_id -170141183460469231731687303715884105729",
1456            expected_error: "expected a valid error ID of type i128",
1457            expected_error_count: 1,
1458            message: "error id smaller than i128::MIN",
1459        }
1460        .run();
1461        NegativeTestCase {
1462            input: "assert error_id 170141183460469231731687303715884105728",
1463            expected_error: "expected a valid error ID of type i128",
1464            expected_error_count: 1,
1465            message: "error id larger than i128::MAX",
1466        }
1467        .run();
1468    }
1469
1470    #[proptest]
1471    fn assertion_context_error_id_can_handle_any_valid_id(id: i128) {
1472        TestCase {
1473            input: &format!("assert error_id {id}"),
1474            expected: vec![Instruction::Assert],
1475            message: "assert, then random id",
1476        }
1477        .run();
1478    }
1479
1480    #[test]
1481    fn parse_erroneous_assertion_contexts_for_assert() {
1482        NegativeTestCase {
1483            input: "assert error_id",
1484            expected_error: "expected a valid error ID of type i128",
1485            expected_error_count: 1,
1486            message: "missing id after keyword `error_id`",
1487        }
1488        .run();
1489        NegativeTestCase {
1490            input: "assert error id 42",
1491            expected_error: "expected label, instruction, or end of file",
1492            expected_error_count: 1,
1493            message: "incorrect keyword `error id`",
1494        }
1495        .run();
1496        NegativeTestCase {
1497            input: "error_id 42",
1498            expected_error: "incorrectly placed assertion context",
1499            expected_error_count: 1,
1500            message: "standalone assertion context",
1501        }
1502        .run();
1503        NegativeTestCase {
1504            input: "error_id 42 error_id 42",
1505            expected_error: "incorrectly placed assertion context",
1506            expected_error_count: 2,
1507            message: "multiple standalone assertion contexts",
1508        }
1509        .run();
1510        NegativeTestCase {
1511            input: "nop error_id 42",
1512            expected_error: "incorrectly placed assertion context",
1513            expected_error_count: 1,
1514            message: "error id without assertion",
1515        }
1516        .run();
1517        NegativeTestCase {
1518            input: "assert error_id 42 error_id 1337",
1519            expected_error: "incorrectly placed assertion context",
1520            expected_error_count: 1,
1521            message: "multiple error ids for the same assertion",
1522        }
1523        .run();
1524        NegativeTestCase {
1525            input: "assert_vector error_id 42 error_id 1337",
1526            expected_error: "incorrectly placed assertion context",
1527            expected_error_count: 1,
1528            message: "multiple error ids for the same vector assertion",
1529        }
1530        .run();
1531        NegativeTestCase {
1532            input: "error_id 42 assert",
1533            expected_error: "incorrectly placed assertion context",
1534            expected_error_count: 1,
1535            message: "error id precedes assertion",
1536        }
1537        .run();
1538        NegativeTestCase {
1539            input: "error_id 42 assert error_id 1337",
1540            expected_error: "incorrectly placed assertion context",
1541            expected_error_count: 1,
1542            message: "first error id precedes assertion",
1543        }
1544        .run();
1545    }
1546
1547    #[proptest]
1548    fn assertion_context_error_id_fails_for_invalid_id(
1549        #[strategy(proptest::strategy::Union::new(["assert", "assert_vector"]))]
1550        instruction: String,
1551        #[filter(#id.parse::<i128>().is_err())] id: String,
1552    ) {
1553        // A valid ID followed by a comment is valid, and therefore not relevant
1554        // here. This might be a monkey-patch, but I don't know how to generate
1555        // better input.
1556        if let Some(comment_idx) = id.find("//") {
1557            let (id, _) = id.split_at(comment_idx);
1558            prop_assume!(id.parse::<i128>().is_err());
1559        }
1560
1561        // not using `NegativeTest`: different `id`s trigger different errors
1562        let input = format!("{instruction} error_id {id}");
1563        prop_assert!(parse(&input).is_err());
1564    }
1565
1566    #[proptest]
1567    fn type_hint_to_string_to_type_hint_is_identity(#[strategy(arb())] type_hint: TypeHint) {
1568        let type_hint_string = type_hint.to_string();
1569        let instruction_tokens =
1570            parse(&type_hint_string).map_err(|err| TestCaseError::fail(err.to_string()))?;
1571        let labelled_instructions = to_labelled_instructions(&instruction_tokens);
1572        prop_assert_eq!(1, labelled_instructions.len());
1573        let first_labelled_instruction = labelled_instructions[0].clone();
1574        let_assert!(LabelledInstruction::TypeHint(parsed_type_hint) = first_labelled_instruction);
1575        prop_assert_eq!(type_hint, parsed_type_hint);
1576    }
1577
1578    #[proptest]
1579    fn assertion_context_to_string_to_assertion_context_is_identity(
1580        #[strategy(arb())] context: AssertionContext,
1581    ) {
1582        let assert_with_context = format!("assert {context}");
1583        let instruction_tokens =
1584            parse(&assert_with_context).map_err(|err| TestCaseError::fail(err.to_string()))?;
1585        let labelled_instructions = to_labelled_instructions(&instruction_tokens);
1586        prop_assert_eq!(2, labelled_instructions.len());
1587        let_assert!(LabelledInstruction::AssertionContext(parsed) = &labelled_instructions[1]);
1588        prop_assert_eq!(&context, parsed);
1589    }
1590
1591    #[test]
1592    fn type_hint_type_can_contain_angle_brackets_and_leading_asterisks() {
1593        fn test(expected: Option<&'static str>, input: &'static str) {
1594            dbg!(input);
1595            let input = format!(": {input}");
1596            let parse_result = type_hint_type_name(&input);
1597            let Some(expected) = expected else {
1598                if let Ok((_, Some(ty))) = parse_result {
1599                    panic!("expected error but parsed \"{ty}\"");
1600                }
1601                return assert!(parse_result.is_err());
1602            };
1603            let_assert!(Ok((rest, Some(ty))) = parse_result);
1604            dbg!(rest);
1605            assert!(expected == ty);
1606        }
1607
1608        test(Some("MyType"), "MyType");
1609        test(Some("MyType"), " MyType ");
1610
1611        test(Some("*Pointer"), "*Pointer");
1612        test(Some("*_Pointer"), "*_Pointer");
1613        test(Some("***Pointer"), "***Pointer");
1614        test(Some("*Po"), "*Po*nter");
1615        test(Some("*Po_"), "*Po_*nter");
1616
1617        test(Some("NoGenerics"), "NoGenerics<>");
1618        test(Some("NoGenerics"), "NoGenerics <>");
1619        test(Some("NoGenerics"), "NoGenerics< /* comment */ >");
1620        test(Some("NoGenerics"), "NoGenerics<");
1621        test(Some("NoGenerics"), "NoGenerics< /* comment */");
1622        test(Some("NoGenerics"), "NoGenerics< // comment");
1623
1624        test(Some("OneGeneric<A>"), "OneGeneric<A>");
1625        test(Some("OneGeneric<A>"), "OneGeneric<A >");
1626        test(Some("OneGeneric<A>"), "OneGeneric< A>");
1627        test(Some("OneGeneric<A>"), "OneGeneric< A >");
1628        test(Some("OneGeneric<A>"), "OneGeneric<A,>");
1629        test(Some("OneGeneric<A>"), "OneGeneric<A, /* … */ >");
1630        test(Some("OneGeneric"), "OneGeneric<A,,>");
1631        test(Some("OneGeneric"), "OneGeneric<,A,>");
1632        test(Some("OneGeneric<*A>"), "OneGeneric<*A>");
1633        test(Some("OneGeneric<*A>"), "OneGeneric<*A,>");
1634        test(Some("OneGeneric<*A>"), "OneGeneric<*A, >");
1635        test(Some("OneGeneric<*A>"), "OneGeneric< *A, >");
1636        test(Some("OneGeneric<*A>"), "OneGeneric< *A , >");
1637        test(Some("OneGeneric"), "OneGeneric< *A , , >");
1638
1639        test(Some("TwoGenerics<A, B>"), "TwoGenerics<A, B>");
1640        test(Some("TwoGenerics<A, B>"), "TwoGenerics<A, B >");
1641        test(Some("TwoGenerics<A, B>"), "TwoGenerics< A, B>");
1642        test(Some("TwoGenerics<A, B>"), "TwoGenerics< A, B >");
1643        test(Some("TwoGenerics<A, B>"), "TwoGenerics<A, B,>");
1644        test(Some("TwoGenerics<A, B>"), "TwoGenerics<A, B, /* … */ >");
1645        test(Some("TwoGenerics"), "TwoGenerics<A, B,,>");
1646        test(Some("TwoGenerics"), "TwoGenerics<,A, B,>");
1647        test(Some("TwoGenerics<*A, B>"), "TwoGenerics<*A, B>");
1648        test(Some("TwoGenerics<*A, B>"), "TwoGenerics<*A, B,>");
1649        test(Some("TwoGenerics<A, *B>"), "TwoGenerics<A, *B,>");
1650        test(Some("*TwoGenerics<A, B>"), "*TwoGenerics<A, B,>");
1651
1652        test(Some("Outer<Inner<A>>"), "Outer<Inner<A>>");
1653        test(Some("Outer<Inner<A>>"), "Outer < Inner < A > >");
1654        test(Some("Outer<*Inner<A>>"), "Outer < *Inner < A > >");
1655        test(Some("Outer<*Inner<*A>>"), "Outer < *Inner < *A > >");
1656        test(Some("*Outer<*Inner<*A>>"), "*Outer < *Inner < *A > >");
1657        test(Some("**Outer<**Inner<**A>>"), "**Outer < **Inner < **A > >");
1658
1659        test(None, "");
1660        test(None, "0");
1661        test(None, "*0");
1662        test(None, "*0MyType");
1663    }
1664
1665    #[test]
1666    fn triton_asm_macro() {
1667        let instructions = triton_asm!(write_io 3 push 17 call huh lt swap 3);
1668        assert_eq!(
1669            LabelledInstruction::Instruction(AnInstruction::WriteIo(NumberOfWords::N3)),
1670            instructions[0]
1671        );
1672        assert_eq!(
1673            LabelledInstruction::Instruction(AnInstruction::Push(bfe!(17))),
1674            instructions[1]
1675        );
1676        assert_eq!(
1677            LabelledInstruction::Instruction(AnInstruction::Call("huh".to_string())),
1678            instructions[2]
1679        );
1680        assert_eq!(
1681            LabelledInstruction::Instruction(AnInstruction::Lt),
1682            instructions[3]
1683        );
1684        assert_eq!(
1685            LabelledInstruction::Instruction(AnInstruction::Swap(OpStackElement::ST3)),
1686            instructions[4]
1687        );
1688    }
1689
1690    #[test]
1691    fn triton_asm_macro_with_a_single_return() {
1692        let instructions = triton_asm!(return);
1693        assert_eq!(
1694            LabelledInstruction::Instruction(AnInstruction::Return),
1695            instructions[0]
1696        );
1697    }
1698
1699    #[test]
1700    fn triton_asm_macro_with_a_single_assert() {
1701        let instructions = triton_asm!(assert);
1702        assert_eq!(
1703            LabelledInstruction::Instruction(AnInstruction::Assert),
1704            instructions[0]
1705        );
1706    }
1707
1708    #[test]
1709    fn triton_asm_macro_with_only_assert_and_return() {
1710        let instructions = triton_asm!(assert return);
1711        assert_eq!(
1712            LabelledInstruction::Instruction(AnInstruction::Assert),
1713            instructions[0]
1714        );
1715        assert_eq!(
1716            LabelledInstruction::Instruction(AnInstruction::Return),
1717            instructions[1]
1718        );
1719    }
1720
1721    #[test]
1722    fn triton_program_macro() {
1723        let program = triton_program!(
1724            // main entry point
1725            call routine_1
1726            call routine_2 // inline comment
1727            halt
1728
1729            // single line content regarding subroutine 1
1730            routine_1:
1731                /*
1732                 * multi-line comment
1733                 */
1734                call routine_3
1735                return
1736
1737            // subroutine 2 starts here
1738            routine_2:
1739                push 18 push 7 add
1740                push 25 eq assert
1741                return
1742
1743            routine_3:
1744            alternative_label:
1745                nop nop
1746                return
1747        );
1748        println!("{program}");
1749    }
1750
1751    #[test]
1752    fn triton_program_macro_interpolates_various_types() {
1753        let push_arg = rand::rng().random_range(0..BFieldElement::P);
1754        let instruction_push =
1755            LabelledInstruction::Instruction(AnInstruction::Push(push_arg.into()));
1756        let swap_argument = "1";
1757        triton_program!({instruction_push} push {push_arg} swap {swap_argument} eq assert halt);
1758    }
1759
1760    #[test]
1761    fn triton_instruction_macro_parses_simple_instructions() {
1762        assert_eq!(
1763            LabelledInstruction::Instruction(AnInstruction::Halt),
1764            triton_instr!(halt)
1765        );
1766        assert_eq!(
1767            LabelledInstruction::Instruction(AnInstruction::Add),
1768            triton_instr!(add)
1769        );
1770        assert_eq!(
1771            LabelledInstruction::Instruction(AnInstruction::Pop(NumberOfWords::N3)),
1772            triton_instr!(pop 3)
1773        );
1774    }
1775
1776    #[test]
1777    #[should_panic(expected = "not_an_instruction")]
1778    fn triton_instruction_macro_fails_when_encountering_unknown_instruction() {
1779        triton_instr!(not_an_instruction);
1780    }
1781
1782    #[test]
1783    fn triton_instruction_macro_parses_instructions_with_argument() {
1784        assert_eq!(
1785            LabelledInstruction::Instruction(AnInstruction::Push(bfe!(7))),
1786            triton_instr!(push 7)
1787        );
1788        assert_eq!(
1789            LabelledInstruction::Instruction(AnInstruction::Dup(OpStackElement::ST3)),
1790            triton_instr!(dup 3)
1791        );
1792        assert_eq!(
1793            LabelledInstruction::Instruction(AnInstruction::Swap(OpStackElement::ST5)),
1794            triton_instr!(swap 5)
1795        );
1796        assert_eq!(
1797            LabelledInstruction::Instruction(AnInstruction::Call("my_label".to_string())),
1798            triton_instr!(call my_label)
1799        );
1800    }
1801
1802    #[test]
1803    fn triton_asm_macro_can_repeat_instructions() {
1804        let instructions = triton_asm![push 42; 3];
1805        let expected_instructions =
1806            vec![LabelledInstruction::Instruction(AnInstruction::Push(bfe!(42))); 3];
1807        assert_eq!(expected_instructions, instructions);
1808
1809        let instructions = triton_asm![read_io 2; 15];
1810        let expected_instructions =
1811            vec![LabelledInstruction::Instruction(AnInstruction::ReadIo(NumberOfWords::N2)); 15];
1812        assert_eq!(expected_instructions, instructions);
1813
1814        let instructions = triton_asm![divine 3; Digest::LEN];
1815        let expected_instructions =
1816            vec![
1817                LabelledInstruction::Instruction(AnInstruction::Divine(NumberOfWords::N3));
1818                Digest::LEN
1819            ];
1820        assert_eq!(expected_instructions, instructions);
1821    }
1822
1823    #[test]
1824    fn break_gets_turned_into_labelled_instruction() {
1825        let instructions = triton_asm![break];
1826        let expected_instructions = vec![LabelledInstruction::Breakpoint];
1827        assert_eq!(expected_instructions, instructions);
1828    }
1829
1830    #[test]
1831    fn break_does_not_propagate_to_full_program() {
1832        let program = triton_program! { break halt break };
1833        assert_eq!(1, program.len_bwords());
1834    }
1835}
triton_isa/parser.rs

triton_isa/
parser.rs