triton_isa/
parser.rs

1use std::collections::hash_map::Entry;
2use std::collections::HashMap;
3use std::collections::HashSet;
4use std::error::Error;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::fmt::Result as FmtResult;
8
9use nom::branch::alt;
10use nom::bytes::complete::tag;
11use nom::bytes::complete::take_while;
12use nom::bytes::complete::take_while1;
13use nom::character::complete::digit1;
14use nom::combinator::cut;
15use nom::combinator::eof;
16use nom::combinator::fail;
17use nom::combinator::opt;
18use nom::error::ErrorKind;
19use nom::error::VerboseError;
20use nom::error::VerboseErrorKind;
21use nom::multi::many0;
22use nom::multi::many1;
23use nom::Finish;
24use nom::IResult;
25use twenty_first::bfe;
26use twenty_first::prelude::BFieldElement;
27
28use crate::instruction::AnInstruction;
29use crate::instruction::AssertionContext;
30use crate::instruction::Instruction;
31use crate::instruction::LabelledInstruction;
32use crate::instruction::TypeHint;
33use crate::instruction::ALL_INSTRUCTION_NAMES;
34use crate::op_stack::NumberOfWords;
35use crate::op_stack::OpStackElement;
36
37const KEYWORDS: [&str; 3] = [
38    "hint",
39    "error_id",
40    "error_message", // reserved for future use
41];
42
43#[derive(Debug, PartialEq)]
44pub struct ParseError<'a> {
45    pub input: &'a str,
46    pub errors: VerboseError<&'a str>,
47}
48
49/// An intermediate object for the parsing / compilation pipeline. You probably want
50/// [`LabelledInstruction`].
51#[derive(Debug, Clone, Eq, PartialEq, Hash)]
52pub enum InstructionToken<'a> {
53    Instruction(AnInstruction<String>, &'a str),
54    Label(String, &'a str),
55    Breakpoint(&'a str),
56    TypeHint(TypeHint, &'a str),
57    AssertionContext(AssertionContext, &'a str),
58}
59
60impl Display for ParseError<'_> {
61    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
62        let mut errors = self.errors.clone();
63        if matches!(
64            errors.errors.first(),
65            Some((_, VerboseErrorKind::Nom(ErrorKind::Fail | ErrorKind::Eof)))
66        ) {
67            errors.errors.remove(0);
68        }
69
70        write!(f, "{}", nom::error::convert_error(self.input, errors))
71    }
72}
73
74impl Error for ParseError<'_> {}
75
76impl<'a> InstructionToken<'a> {
77    pub fn token_str(&self) -> &'a str {
78        match self {
79            Self::Instruction(_, token_str) => token_str,
80            Self::Label(_, token_str) => token_str,
81            Self::Breakpoint(token_str) => token_str,
82            Self::TypeHint(_, token_str) => token_str,
83            Self::AssertionContext(_, token_str) => token_str,
84        }
85    }
86
87    pub fn to_labelled_instruction(&self) -> LabelledInstruction {
88        match self {
89            Self::Instruction(instr, _) => LabelledInstruction::Instruction(instr.to_owned()),
90            Self::Label(label, _) => LabelledInstruction::Label(label.to_owned()),
91            Self::Breakpoint(_) => LabelledInstruction::Breakpoint,
92            Self::TypeHint(type_hint, _) => LabelledInstruction::TypeHint(type_hint.to_owned()),
93            Self::AssertionContext(ctx, _) => LabelledInstruction::AssertionContext(ctx.to_owned()),
94        }
95    }
96}
97
98pub fn to_labelled_instructions(instructions: &[InstructionToken]) -> Vec<LabelledInstruction> {
99    instructions
100        .iter()
101        .map(|instruction| instruction.to_labelled_instruction())
102        .collect()
103}
104
105/// Parse a program
106pub(crate) fn parse(input: &str) -> Result<Vec<InstructionToken>, ParseError> {
107    let (_, instructions) = tokenize(input)
108        .finish()
109        .map_err(|errors| ParseError { input, errors })?;
110
111    ensure_no_missing_or_duplicate_labels(input, &instructions)?;
112    ensure_assertion_context_is_matched_with_assertion(input, &instructions)?;
113
114    Ok(instructions)
115}
116
117fn ensure_no_missing_or_duplicate_labels<'a>(
118    input: &'a str,
119    instructions: &[InstructionToken<'a>],
120) -> Result<(), ParseError<'a>> {
121    // identify all and duplicate labels
122    let mut seen_labels = HashMap::<_, InstructionToken>::default();
123    let mut duplicate_labels = HashSet::default();
124    for instruction in instructions {
125        if let InstructionToken::Label(label, _) = instruction {
126            if let Some(first_occurrence) = seen_labels.get(label.as_str()) {
127                duplicate_labels.insert(first_occurrence.to_owned());
128                duplicate_labels.insert(instruction.to_owned());
129            } else {
130                seen_labels.insert(label.as_str(), instruction.to_owned());
131            }
132        }
133    }
134
135    let missing_labels = identify_missing_labels(instructions, seen_labels);
136
137    if duplicate_labels.is_empty() && missing_labels.is_empty() {
138        return Ok(());
139    }
140
141    let errors = errors_for_duplicate_and_missing_labels(duplicate_labels, missing_labels);
142    Err(ParseError { input, errors })
143}
144
145fn identify_missing_labels<'a>(
146    instructions: &[InstructionToken<'a>],
147    seen_labels: HashMap<&str, InstructionToken>,
148) -> HashSet<InstructionToken<'a>> {
149    let mut missing_labels = HashSet::default();
150    for instruction in instructions {
151        if let InstructionToken::Instruction(AnInstruction::Call(label), _) = instruction {
152            if !seen_labels.contains_key(label.as_str()) {
153                missing_labels.insert(instruction.to_owned());
154            }
155        }
156    }
157    missing_labels
158}
159
160fn errors_for_duplicate_and_missing_labels<'a>(
161    duplicate_labels: HashSet<InstructionToken<'a>>,
162    missing_labels: HashSet<InstructionToken<'a>>,
163) -> VerboseError<&'a str> {
164    let duplicate_label_error_context = VerboseErrorKind::Context("duplicate label");
165    let missing_label_error_context = VerboseErrorKind::Context("missing label");
166
167    let duplicate_label_errors =
168        errors_for_labels_with_context(duplicate_labels, duplicate_label_error_context);
169    let missing_label_errors =
170        errors_for_labels_with_context(missing_labels, missing_label_error_context);
171
172    let errors = [duplicate_label_errors, missing_label_errors].concat();
173    VerboseError { errors }
174}
175
176fn errors_for_labels_with_context(
177    labels: HashSet<InstructionToken>,
178    context: VerboseErrorKind,
179) -> Vec<(&str, VerboseErrorKind)> {
180    labels
181        .into_iter()
182        .map(|label| (label.token_str(), context.clone()))
183        .collect()
184}
185
186fn ensure_assertion_context_is_matched_with_assertion<'a>(
187    input: &'a str,
188    instructions: &[InstructionToken<'a>],
189) -> Result<(), ParseError<'a>> {
190    type IT<'a> = InstructionToken<'a>;
191
192    let mut accept_id = false;
193    let mut incorrectly_placed_contexts = HashSet::new();
194
195    for instruction in instructions {
196        match instruction {
197            IT::Instruction(AnInstruction::Assert | AnInstruction::AssertVector, _) => {
198                accept_id = true;
199            }
200            IT::AssertionContext(AssertionContext::ID(_), _) => {
201                if !accept_id {
202                    incorrectly_placed_contexts.insert(instruction.clone());
203                }
204                accept_id = false;
205            }
206
207            // any assertion context must immediately follow an `assert` or `assert_vector`
208            _ => accept_id = false,
209        }
210    }
211
212    let parser_context = VerboseErrorKind::Context("incorrectly placed assertion context");
213    let errors = errors_for_labels_with_context(incorrectly_placed_contexts, parser_context);
214
215    if errors.is_empty() {
216        Ok(())
217    } else {
218        let errors = VerboseError { errors };
219        Err(ParseError { input, errors })
220    }
221}
222
223/// Auxiliary type alias: `IResult` defaults to `nom::error::Error` as concrete
224/// error type, but we want `nom::error::VerboseError` as it allows `context()`.
225type ParseResult<'input, Out> = IResult<&'input str, Out, VerboseError<&'input str>>;
226
227pub fn tokenize(s: &str) -> ParseResult<Vec<InstructionToken>> {
228    let (s, _) = comment_or_whitespace0(s)?;
229    let (s, instructions) = many0(alt((
230        label,
231        labelled_instruction,
232        breakpoint,
233        type_hint,
234        assertion_context,
235    )))(s)?;
236    let (s, _) = nom::error::context("expecting label, instruction or eof", eof)(s)?;
237
238    Ok((s, instructions))
239}
240
241fn label(label_s: &str) -> ParseResult<InstructionToken> {
242    let (s, addr) = label_addr(label_s)?;
243    let (s, _) = whitespace0(s)?; // whitespace between label and ':' is allowed
244    let (s, _) = token0(":")(s)?; // don't require space after ':'
245
246    // Checking if `<label>:` is an instruction must happen after parsing `:`, since
247    // otherwise `cut` will reject the alternative parsers of `label`, like
248    // `labelled_instruction`, where valid instructions names *are* allowed.
249    if is_illegal_label(&addr) {
250        let failure_reason = "label must be neither instruction nor keyword";
251        return cut(nom::error::context(failure_reason, fail))(label_s);
252    }
253
254    Ok((s, InstructionToken::Label(addr, label_s)))
255}
256
257fn breakpoint(breakpoint_s: &str) -> ParseResult<InstructionToken> {
258    let (s, _) = token1("break")(breakpoint_s)?;
259    Ok((s, InstructionToken::Breakpoint(breakpoint_s)))
260}
261
262fn labelled_instruction(s_instr: &str) -> ParseResult<InstructionToken> {
263    let (s, instr) = an_instruction(s_instr)?;
264    Ok((s, InstructionToken::Instruction(instr, s_instr)))
265}
266
267fn an_instruction(s: &str) -> ParseResult<AnInstruction<String>> {
268    // OpStack manipulation
269    let pop = pop_instruction();
270    let push = push_instruction();
271    let divine = divine_instruction();
272    let pick = pick_instruction();
273    let place = place_instruction();
274    let dup = dup_instruction();
275    let swap = swap_instruction();
276
277    let opstack_manipulation = alt((pop, push, divine, pick, place, dup, swap));
278
279    // Control flow
280    let halt = instruction("halt", AnInstruction::Halt);
281    let nop = instruction("nop", AnInstruction::Nop);
282    let skiz = instruction("skiz", AnInstruction::Skiz);
283    let call = call_instruction();
284    let return_ = instruction("return", AnInstruction::Return);
285    let recurse = instruction("recurse", AnInstruction::Recurse);
286    let recurse_or_return = instruction("recurse_or_return", AnInstruction::RecurseOrReturn);
287    let assert = instruction("assert", AnInstruction::Assert);
288
289    let control_flow = alt((nop, skiz, call, return_, halt));
290
291    // Memory access
292    let read_mem = read_mem_instruction();
293    let write_mem = write_mem_instruction();
294
295    let memory_access = alt((read_mem, write_mem));
296
297    // Hashing-related instructions
298    let hash = instruction("hash", AnInstruction::Hash);
299    let assert_vector = instruction("assert_vector", AnInstruction::AssertVector);
300    let sponge_init = instruction("sponge_init", AnInstruction::SpongeInit);
301    let sponge_absorb = instruction("sponge_absorb", AnInstruction::SpongeAbsorb);
302    let sponge_absorb_mem = instruction("sponge_absorb_mem", AnInstruction::SpongeAbsorbMem);
303    let sponge_squeeze = instruction("sponge_squeeze", AnInstruction::SpongeSqueeze);
304
305    let hashing_related = alt((hash, sponge_init, sponge_squeeze));
306
307    // Arithmetic on stack instructions
308    let add = instruction("add", AnInstruction::Add);
309    let addi = addi_instruction();
310    let mul = instruction("mul", AnInstruction::Mul);
311    let invert = instruction("invert", AnInstruction::Invert);
312    let eq = instruction("eq", AnInstruction::Eq);
313    let split = instruction("split", AnInstruction::Split);
314    let lt = instruction("lt", AnInstruction::Lt);
315    let and = instruction("and", AnInstruction::And);
316    let xor = instruction("xor", AnInstruction::Xor);
317    let log_2_floor = instruction("log_2_floor", AnInstruction::Log2Floor);
318    let pow = instruction("pow", AnInstruction::Pow);
319    let div_mod = instruction("div_mod", AnInstruction::DivMod);
320    let pop_count = instruction("pop_count", AnInstruction::PopCount);
321    let xx_add = instruction("xx_add", AnInstruction::XxAdd);
322    let xx_mul = instruction("xx_mul", AnInstruction::XxMul);
323    let x_invert = instruction("x_invert", AnInstruction::XInvert);
324    let xb_mul = instruction("xb_mul", AnInstruction::XbMul);
325
326    let base_field_arithmetic_on_stack = alt((mul, invert, eq));
327    let bitwise_arithmetic_on_stack =
328        alt((split, lt, and, xor, log_2_floor, pow, div_mod, pop_count));
329    let extension_field_arithmetic_on_stack = alt((xx_add, xx_mul, x_invert, xb_mul));
330    let arithmetic_on_stack = alt((
331        base_field_arithmetic_on_stack,
332        bitwise_arithmetic_on_stack,
333        extension_field_arithmetic_on_stack,
334    ));
335
336    // Read/write
337    let read_io = read_io_instruction();
338    let write_io = write_io_instruction();
339
340    let read_write = alt((read_io, write_io));
341
342    // Many-in-One
343    let merkle_step = instruction("merkle_step", AnInstruction::MerkleStep);
344    let merkle_step_mem = instruction("merkle_step_mem", AnInstruction::MerkleStepMem);
345    let xx_dot_step = instruction("xx_dot_step", AnInstruction::XxDotStep);
346    let xb_dot_step = instruction("xb_dot_step", AnInstruction::XbDotStep);
347
348    let many_to_one = alt((xx_dot_step, xb_dot_step));
349
350    // Because of common prefixes, the following parsers are sensitive to order.
351    // Successfully parsing "assert" before trying "assert_vector" can lead to
352    // picking the wrong one. By trying them in the order of longest first, less
353    // backtracking is necessary.
354    let syntax_ambiguous = alt((
355        recurse_or_return,
356        recurse,
357        assert_vector,
358        assert,
359        sponge_absorb_mem,
360        sponge_absorb,
361        addi,
362        add,
363        merkle_step_mem,
364        merkle_step,
365    ));
366
367    alt((
368        opstack_manipulation,
369        control_flow,
370        memory_access,
371        hashing_related,
372        arithmetic_on_stack,
373        read_write,
374        many_to_one,
375        syntax_ambiguous,
376    ))(s)
377}
378
379fn is_illegal_label(s: &str) -> bool {
380    ALL_INSTRUCTION_NAMES.contains(&s) || KEYWORDS.contains(&s)
381}
382
383fn instruction<'a>(
384    name: &'a str,
385    instruction: AnInstruction<String>,
386) -> impl Fn(&'a str) -> ParseResult<'a, AnInstruction<String>> {
387    move |s: &'a str| {
388        let (s, _) = token1(name)(s)?;
389        Ok((s, instruction.clone()))
390    }
391}
392
393fn pop_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
394    move |s: &str| {
395        let (s, _) = token1("pop")(s)?;
396        let (s, arg) = number_of_words(s)?;
397        Ok((s, AnInstruction::Pop(arg)))
398    }
399}
400
401fn push_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
402    move |s: &str| {
403        let (s, _) = token1("push")(s)?;
404        let (s, elem) = field_element(s)?;
405        Ok((s, AnInstruction::Push(elem)))
406    }
407}
408
409fn addi_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
410    move |s: &str| {
411        let (s, _) = token1("addi")(s)?;
412        let (s, elem) = field_element(s)?;
413        Ok((s, AnInstruction::AddI(elem)))
414    }
415}
416
417fn divine_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
418    move |s: &str| {
419        let (s, _) = token1("divine")(s)?;
420        let (s, arg) = number_of_words(s)?;
421        Ok((s, AnInstruction::Divine(arg)))
422    }
423}
424
425fn pick_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
426    move |s: &str| {
427        let (s, _) = token1("pick")(s)?;
428        let (s, arg) = stack_register(s)?;
429        Ok((s, AnInstruction::Pick(arg)))
430    }
431}
432
433fn place_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
434    move |s: &str| {
435        let (s, _) = token1("place")(s)?;
436        let (s, arg) = stack_register(s)?;
437        Ok((s, AnInstruction::Place(arg)))
438    }
439}
440
441fn dup_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
442    move |s: &str| {
443        let (s, _) = token1("dup")(s)?; // require space before argument
444        let (s, stack_register) = stack_register(s)?;
445        Ok((s, AnInstruction::Dup(stack_register)))
446    }
447}
448
449fn swap_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
450    move |s: &str| {
451        let (s, _) = token1("swap")(s)?;
452        let (s, stack_register) = stack_register(s)?;
453        Ok((s, AnInstruction::Swap(stack_register)))
454    }
455}
456
457fn call_instruction<'a>() -> impl Fn(&'a str) -> ParseResult<'a, AnInstruction<String>> {
458    move |s: &'a str| {
459        let (s, _) = token1("call")(s)?;
460        let (s, label) = label_addr(s)?;
461        let (s, _) = comment_or_whitespace1(s)?;
462
463        // This check cannot be moved into `label_addr`, since `label_addr` is shared
464        // between the scenarios `<label>:` and `call <label>`; the former requires
465        // parsing the `:` before rejecting a possible instruction name in the label.
466        if is_illegal_label(&label) {
467            let failure_reason = "label must be neither instruction nor keyword";
468            return cut(nom::error::context(failure_reason, fail))(s);
469        }
470
471        Ok((s, AnInstruction::Call(label)))
472    }
473}
474
475fn read_mem_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
476    move |s: &str| {
477        let (s, _) = token1("read_mem")(s)?;
478        let (s, arg) = number_of_words(s)?;
479        Ok((s, AnInstruction::ReadMem(arg)))
480    }
481}
482
483fn write_mem_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
484    move |s: &str| {
485        let (s, _) = token1("write_mem")(s)?;
486        let (s, arg) = number_of_words(s)?;
487        Ok((s, AnInstruction::WriteMem(arg)))
488    }
489}
490
491fn read_io_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
492    move |s: &str| {
493        let (s, _) = token1("read_io")(s)?;
494        let (s, arg) = number_of_words(s)?;
495        Ok((s, AnInstruction::ReadIo(arg)))
496    }
497}
498
499fn write_io_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
500    move |s: &str| {
501        let (s, _) = token1("write_io")(s)?;
502        let (s, arg) = number_of_words(s)?;
503        Ok((s, AnInstruction::WriteIo(arg)))
504    }
505}
506
507fn field_element(s_orig: &str) -> ParseResult<BFieldElement> {
508    let (s, negative) = opt(token0("-"))(s_orig)?;
509    let (s, n) = digit1(s)?;
510    let (s, _) = comment_or_whitespace1(s)?;
511
512    let Ok(mut n): Result<i128, _> = n.parse() else {
513        return nom::error::context("out-of-bounds constant", fail)(s);
514    };
515
516    let quotient = i128::from(BFieldElement::P);
517    if n >= quotient {
518        return nom::error::context("out-of-bounds constant", fail)(s_orig);
519    }
520
521    if negative.is_some() {
522        n *= -1;
523        n += quotient;
524    }
525
526    Ok((s, BFieldElement::new(n as u64)))
527}
528
529fn stack_register(s: &str) -> ParseResult<OpStackElement> {
530    let (s, n) = digit1(s)?;
531    let stack_register = match n {
532        "0" => OpStackElement::ST0,
533        "1" => OpStackElement::ST1,
534        "2" => OpStackElement::ST2,
535        "3" => OpStackElement::ST3,
536        "4" => OpStackElement::ST4,
537        "5" => OpStackElement::ST5,
538        "6" => OpStackElement::ST6,
539        "7" => OpStackElement::ST7,
540        "8" => OpStackElement::ST8,
541        "9" => OpStackElement::ST9,
542        "10" => OpStackElement::ST10,
543        "11" => OpStackElement::ST11,
544        "12" => OpStackElement::ST12,
545        "13" => OpStackElement::ST13,
546        "14" => OpStackElement::ST14,
547        "15" => OpStackElement::ST15,
548        _ => {
549            let failure_reason = "using an out-of-bounds stack register (0-15 exist)";
550            return nom::error::context(failure_reason, fail)(s);
551        }
552    };
553    let (s, _) = comment_or_whitespace1(s)?;
554
555    Ok((s, stack_register))
556}
557
558fn number_of_words(s: &str) -> ParseResult<NumberOfWords> {
559    let (s, n) = digit1(s)?;
560    let arg = match n {
561        "1" => NumberOfWords::N1,
562        "2" => NumberOfWords::N2,
563        "3" => NumberOfWords::N3,
564        "4" => NumberOfWords::N4,
565        "5" => NumberOfWords::N5,
566        _ => return nom::error::context("using an out-of-bounds argument (1-5 allowed)", fail)(s),
567    };
568    let (s, _) = comment_or_whitespace1(s)?; // require space after element
569
570    Ok((s, arg))
571}
572
573/// Parse a label address. This is used in "`<label>:`" and in "`call <label>`".
574fn label_addr(s_orig: &str) -> ParseResult<String> {
575    let (s, addr_part_0) = take_while1(is_label_start_char)(s_orig)?;
576    if addr_part_0.is_empty() {
577        // todo: this error is never shown to the user, since the `label` parser is wrapped in an
578        //  `alt`. With a custom error type, it is possible to have alt return the error of the
579        //  parser that went the farthest in the input data.
580        let failure_reason = "label must start with an alphabetic character or underscore";
581        return nom::error::context(failure_reason, fail)(s_orig);
582    }
583    let (s, addr_part_1) = take_while(is_label_char)(s)?;
584
585    Ok((s, format!("{addr_part_0}{addr_part_1}")))
586}
587
588fn is_label_start_char(c: char) -> bool {
589    c.is_alphabetic() || c == '_'
590}
591
592fn is_label_char(c: char) -> bool {
593    is_label_start_char(c) || c.is_numeric() || c == '-'
594}
595
596/// Parse 0 or more comments and/or whitespace.
597///
598/// This is used after places where whitespace is optional (e.g. after ':').
599fn comment_or_whitespace0(s: &str) -> ParseResult<&str> {
600    let (s, _) = many0(alt((comment1, whitespace1)))(s)?;
601    Ok((s, ""))
602}
603
604/// Parse at least one comment and/or whitespace, or eof.
605///
606/// This is used after places where whitespace is mandatory (e.g. after tokens).
607fn comment_or_whitespace1<'a>(s: &'a str) -> ParseResult<'a, &'a str> {
608    let cws1 = |s: &'a str| -> ParseResult<&str> {
609        let (s, _) = many1(alt((comment1, whitespace1)))(s)?;
610        Ok((s, ""))
611    };
612    alt((eof, cws1))(s)
613}
614
615/// Parse one comment (not including the linebreak)
616fn comment1(s: &str) -> ParseResult<()> {
617    let (s, _) = tag("//")(s)?;
618    let (s, _) = take_while(|c| !is_linebreak(c))(s)?;
619    Ok((s, ()))
620}
621
622/// Parse whitespace characters (can be none)
623fn whitespace0(s: &str) -> ParseResult<()> {
624    let (s, _) = take_while(|c: char| c.is_whitespace())(s)?;
625    Ok((s, ()))
626}
627
628/// Parse at least one whitespace character
629fn whitespace1(s: &str) -> ParseResult<()> {
630    let (s, _) = take_while1(|c: char| c.is_whitespace())(s)?;
631    Ok((s, ()))
632}
633
634fn is_linebreak(c: char) -> bool {
635    c == '\r' || c == '\n'
636}
637
638/// `token0(tok)` will parse the string `tok` and munch 0 or more comment or whitespace.
639fn token0<'a>(token: &'a str) -> impl Fn(&'a str) -> ParseResult<'a, ()> {
640    move |s: &'a str| {
641        let (s, _) = tag(token)(s)?;
642        let (s, _) = comment_or_whitespace0(s)?;
643        Ok((s, ()))
644    }
645}
646
647/// `token1(tok)` will parse the string `tok` and munch at least one comment and/or whitespace,
648/// or eof.
649fn token1<'a>(token: &'a str) -> impl Fn(&'a str) -> ParseResult<'a, ()> {
650    move |s: &'a str| {
651        let (s, _) = tag(token)(s)?;
652        let (s, _) = comment_or_whitespace1(s)?;
653        Ok((s, ()))
654    }
655}
656
657/// Parse one type hint.
658///
659/// Type hints look like this:
660///
661/// ```text
662/// hint <variable_name>[: <type_name>] = stack\[<range_start>[..<range_end>]\]
663/// ```
664fn type_hint(s_type_hint: &str) -> ParseResult<InstructionToken> {
665    let (s, _) = token1("hint")(s_type_hint)?;
666    let (s, variable_name) = type_hint_variable_name(s)?;
667    let (s, type_name) = type_hint_type_name(s)?;
668    let (s, range_start) = type_hint_starting_index(s)?;
669    let (s, maybe_range_end) = type_hint_ending_index(s)?;
670    let (s, _) = whitespace0(s)?;
671    let (s, _) = token0("]")(s)?;
672
673    let length = match maybe_range_end {
674        Some(range_end) if range_end <= range_start => {
675            let failure_reason = "range end must be greater than range start";
676            return cut(nom::error::context(failure_reason, fail))(s);
677        }
678        Some(range_end) => range_end - range_start,
679        None => 1,
680    };
681
682    let type_hint = TypeHint {
683        starting_index: range_start,
684        length,
685        type_name,
686        variable_name,
687    };
688
689    Ok((s, InstructionToken::TypeHint(type_hint, s_type_hint)))
690}
691
692fn type_hint_variable_name(s: &str) -> ParseResult<String> {
693    let (s, variable_name_start) = take_while1(is_type_hint_variable_name_start_character)(s)?;
694    let (s, variable_name_end) = take_while(is_type_hint_variable_name_character)(s)?;
695    let (s, _) = whitespace0(s)?;
696    let variable_name = format!("{variable_name_start}{variable_name_end}");
697    Ok((s, variable_name))
698}
699
700fn type_hint_type_name(s: &str) -> ParseResult<Option<String>> {
701    let Ok((s, _)) = token0(":")(s) else {
702        return Ok((s, None));
703    };
704
705    let (s, type_name_start) = take_while1(is_type_hint_type_name_start_character)(s)?;
706    let (s, type_name_end) = take_while(is_type_hint_type_name_character)(s)?;
707    let (s, _) = whitespace0(s)?;
708    let type_name = format!("{type_name_start}{type_name_end}");
709
710    Ok((s, Some(type_name)))
711}
712
713fn type_hint_ending_index(s: &str) -> ParseResult<Option<usize>> {
714    let Ok((s, _)) = token0("..")(s) else {
715        return Ok((s, None));
716    };
717
718    let (s, range_end) = take_while(|c: char| c.is_numeric())(s)?;
719    let (_, range_end) = parse_str_to_usize(range_end)?;
720
721    Ok((s, Some(range_end)))
722}
723
724fn type_hint_starting_index(s: &str) -> ParseResult<usize> {
725    let (s, _) = token0("=")(s)?;
726    let (s, _) = token0("stack[")(s)?;
727    let (s, range_start) = take_while(|c: char| c.is_numeric())(s)?;
728    let (_, range_start) = parse_str_to_usize(range_start)?;
729    let (s, _) = whitespace0(s)?;
730    Ok((s, range_start))
731}
732
733fn is_type_hint_variable_name_start_character(c: char) -> bool {
734    (c.is_alphabetic() && c.is_lowercase()) || c == '_'
735}
736
737fn is_type_hint_variable_name_character(c: char) -> bool {
738    is_type_hint_variable_name_start_character(c) || c.is_numeric()
739}
740
741fn is_type_hint_type_name_start_character(c: char) -> bool {
742    c.is_alphabetic()
743}
744
745fn is_type_hint_type_name_character(c: char) -> bool {
746    is_type_hint_type_name_start_character(c) || c.is_numeric() || c == '_'
747}
748
749fn parse_str_to_usize(s: &str) -> ParseResult<usize> {
750    match s.parse::<usize>() {
751        Ok(u) => Ok((s, u)),
752        Err(_) => cut(nom::error::context("integer conversion failure", fail))(s),
753    }
754}
755
756fn assertion_context(s_ctx: &str) -> ParseResult<InstructionToken> {
757    let (s, assertion_context) = assertion_context_id(s_ctx)?;
758    let assertion_context = InstructionToken::AssertionContext(assertion_context, s_ctx);
759
760    Ok((s, assertion_context))
761}
762
763fn assertion_context_id(s_ctx: &str) -> ParseResult<AssertionContext> {
764    let (s, _) = token1("error_id")(s_ctx)?;
765    let (s, id) = nom::character::complete::i128(s)?;
766    let (s, _) = comment_or_whitespace1(s)?;
767
768    let assertion_context = AssertionContext::ID(id);
769    Ok((s, assertion_context))
770}
771
772pub(crate) fn build_label_to_address_map(program: &[LabelledInstruction]) -> HashMap<String, u64> {
773    let mut label_map = HashMap::new();
774    let mut instruction_pointer = 0;
775
776    for labelled_instruction in program {
777        if let LabelledInstruction::Instruction(instruction) = labelled_instruction {
778            instruction_pointer += instruction.size() as u64;
779            continue;
780        }
781
782        let LabelledInstruction::Label(label) = labelled_instruction else {
783            continue;
784        };
785        let Entry::Vacant(new_label_map_entry) = label_map.entry(label.clone()) else {
786            panic!("Duplicate label: {label}");
787        };
788        new_label_map_entry.insert(instruction_pointer);
789    }
790
791    label_map
792}
793
794pub(crate) fn turn_labels_into_addresses(
795    labelled_instructions: &[LabelledInstruction],
796    label_to_address: &HashMap<String, u64>,
797) -> Vec<Instruction> {
798    fn turn_label_to_address_for_instruction(
799        labelled_instruction: &LabelledInstruction,
800        label_map: &HashMap<String, u64>,
801    ) -> Option<Instruction> {
802        let LabelledInstruction::Instruction(instruction) = labelled_instruction else {
803            return None;
804        };
805
806        let instruction_with_absolute_address =
807            instruction.map_call_address(|label| address_for_label(label, label_map));
808        Some(instruction_with_absolute_address)
809    }
810
811    fn address_for_label(label: &str, label_map: &HashMap<String, u64>) -> BFieldElement {
812        let maybe_address = label_map.get(label).map(|&a| bfe!(a));
813        maybe_address.unwrap_or_else(|| panic!("Label not found: {label}"))
814    }
815
816    labelled_instructions
817        .iter()
818        .filter_map(|inst| turn_label_to_address_for_instruction(inst, label_to_address))
819        .flat_map(|inst| vec![inst; inst.size()])
820        .collect()
821}
822
823#[cfg(test)]
824pub(crate) mod tests {
825    use assert2::assert;
826    use assert2::let_assert;
827    use itertools::Itertools;
828    use proptest::prelude::*;
829    use proptest_arbitrary_interop::arb;
830    use rand::Rng;
831    use strum::EnumCount;
832    use test_strategy::proptest;
833    use test_strategy::Arbitrary;
834    use twenty_first::bfe;
835    use twenty_first::prelude::Digest;
836
837    use crate::triton_asm;
838    use crate::triton_instr;
839    use crate::triton_program;
840
841    use super::*;
842
843    #[must_use]
844    struct TestCase<'a> {
845        input: &'a str,
846        expected: Vec<Instruction>,
847        message: &'static str,
848    }
849
850    #[must_use]
851    struct NegativeTestCase<'a> {
852        input: &'a str,
853        expected_error: &'static str,
854        expected_error_count: usize,
855        message: &'static str,
856    }
857
858    impl TestCase<'_> {
859        fn run(&self) {
860            let message = self.message;
861            let parse_result = parse(self.input).map_err(|err| format!("{message}:\n{err}"));
862            let_assert!(Ok(actual) = parse_result);
863
864            let labelled_instructions = to_labelled_instructions(&actual);
865            let label_to_address = build_label_to_address_map(&labelled_instructions);
866            let instructions =
867                turn_labels_into_addresses(&labelled_instructions, &label_to_address);
868            assert!(self.expected == instructions, "{message}");
869        }
870    }
871
872    impl NegativeTestCase<'_> {
873        fn run(self) {
874            let Self {
875                input,
876                expected_error,
877                expected_error_count,
878                message,
879            } = self;
880
881            let parse_result = parse(input);
882            if let Ok(instructions) = parse_result {
883                eprintln!("parser input: {input}");
884                eprintln!("parser output: {instructions:?}");
885                panic!("parser should fail, but didn't: {message}");
886            }
887
888            let actual_error = parse_result.map_err(|e| e.to_string()).unwrap_err();
889            let actual_error_count = actual_error.match_indices(expected_error).count();
890            if expected_error_count != actual_error_count {
891                eprintln!("Expected error message ({expected_error_count} times):");
892                eprintln!("{expected_error}");
893                eprintln!("Actual error message (found {actual_error_count} times):");
894                eprintln!("{actual_error}");
895                panic!("Additional context: {message}");
896            }
897        }
898    }
899
900    #[test]
901    fn parse_program_empty() {
902        TestCase {
903            input: "",
904            expected: vec![],
905            message: "empty string should parse as empty program",
906        }
907        .run();
908
909        TestCase {
910            input: "   ",
911            expected: vec![],
912            message: "spaces should parse as empty program",
913        }
914        .run();
915
916        TestCase {
917            input: "\n",
918            expected: vec![],
919            message: "linebreaks should parse as empty program (1)",
920        }
921        .run();
922
923        TestCase {
924            input: "   \n ",
925            expected: vec![],
926            message: "linebreaks should parse as empty program (2)",
927        }
928        .run();
929
930        TestCase {
931            input: "   \n \n",
932            expected: vec![],
933            message: "linebreaks should parse as empty program (3)",
934        }
935        .run();
936
937        TestCase {
938            input: "// empty program",
939            expected: vec![],
940            message: "single comment should parse as empty program",
941        }
942        .run();
943
944        TestCase {
945            input: "// empty program\n",
946            expected: vec![],
947            message: "single comment with linebreak should parse as empty program",
948        }
949        .run();
950
951        TestCase {
952            input: "// multi-line\n// comment",
953            expected: vec![],
954            message: "multiple comments should parse as empty program",
955        }
956        .run();
957
958        TestCase {
959            input: "// multi-line\n// comment\n ",
960            expected: vec![],
961            message: "multiple comments with trailing whitespace should parse as empty program",
962        }
963        .run();
964    }
965
966    #[proptest]
967    fn arbitrary_whitespace_and_comment_sequence_is_empty_program(whitespace: Vec<Whitespace>) {
968        let whitespace = whitespace.into_iter().join("");
969        TestCase {
970            input: &whitespace,
971            expected: vec![],
972            message: "arbitrary whitespace should parse as empty program",
973        }
974        .run();
975    }
976
977    #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, EnumCount, Arbitrary)]
978    enum Whitespace {
979        Space,
980        Tab,
981        CarriageReturn,
982        LineFeed,
983        CarriageReturnLineFeed,
984        Comment,
985    }
986
987    impl Display for Whitespace {
988        fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
989            match self {
990                Self::Space => write!(f, " "),
991                Self::Tab => write!(f, "\t"),
992                Self::CarriageReturn => write!(f, "\r"),
993                Self::LineFeed => writeln!(f),
994                Self::CarriageReturnLineFeed => writeln!(f, "\r"),
995                Self::Comment => writeln!(f, " // comment"),
996            }
997        }
998    }
999
1000    #[test]
1001    fn parse_program_whitespace() {
1002        NegativeTestCase {
1003            input: "poppop",
1004            expected_error: "n/a",
1005            expected_error_count: 0,
1006            message: "whitespace required between instructions (pop)",
1007        }
1008        .run();
1009
1010        NegativeTestCase {
1011            input: "dup 0dup 0",
1012            expected_error: "n/a",
1013            expected_error_count: 0,
1014            message: "whitespace required between instructions (dup 0)",
1015        }
1016        .run();
1017
1018        NegativeTestCase {
1019            input: "swap 10swap 10",
1020            expected_error: "n/a",
1021            expected_error_count: 0,
1022            message: "whitespace required between instructions (swap 10)",
1023        }
1024        .run();
1025
1026        NegativeTestCase {
1027            input: "push10",
1028            expected_error: "n/a",
1029            expected_error_count: 0,
1030            message: "push requires whitespace before its constant",
1031        }
1032        .run();
1033
1034        NegativeTestCase {
1035            input: "push 10pop",
1036            expected_error: "n/a",
1037            expected_error_count: 0,
1038            message: "push requires whitespace after its constant",
1039        }
1040        .run();
1041
1042        NegativeTestCase {
1043            input: "hello: callhello",
1044            expected_error: "n/a",
1045            expected_error_count: 0,
1046            message: "call requires whitespace before its label",
1047        }
1048        .run();
1049
1050        NegativeTestCase {
1051            input: "hello: popcall hello",
1052            expected_error: "n/a",
1053            expected_error_count: 0,
1054            message: "required space between pop and call",
1055        }
1056        .run();
1057    }
1058
1059    #[test]
1060    fn parse_program_label() {
1061        TestCase {
1062            input: "foo: call foo",
1063            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1064            message: "parse labels and calls to labels",
1065        }
1066        .run();
1067
1068        TestCase {
1069            input: "foo:call foo",
1070            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1071            message: "whitespace is not required after 'label:'",
1072        }
1073        .run();
1074
1075        // FIXME: Increase coverage of negative tests for duplicate labels.
1076        NegativeTestCase {
1077            input: "foo: pop 1 foo: pop 1 call foo",
1078            expected_error: "duplicate label",
1079            expected_error_count: 2,
1080            message: "labels cannot occur twice",
1081        }
1082        .run();
1083
1084        // FIXME: Increase coverage of negative tests for missing labels.
1085        NegativeTestCase {
1086            input: "foo: pop 1 call herp call derp",
1087            expected_error: "missing label",
1088            expected_error_count: 2,
1089            message: "non-existent labels cannot be called",
1090        }
1091        .run();
1092
1093        // FIXME: Increase coverage of negative tests for label/keyword overlap.
1094        NegativeTestCase {
1095            input: "pop: call pop",
1096            expected_error: "label must be neither instruction nor keyword",
1097            expected_error_count: 1,
1098            message: "label names may not overlap with instruction names",
1099        }
1100        .run();
1101
1102        TestCase {
1103            input: "pops: call pops",
1104            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1105            message: "labels that share a common prefix with instruction are labels",
1106        }
1107        .run();
1108
1109        TestCase {
1110            input: "_call: call _call",
1111            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1112            message: "labels that share a common suffix with instruction are labels",
1113        }
1114        .run();
1115    }
1116
1117    #[test]
1118    fn parse_program_nonexistent_instructions() {
1119        NegativeTestCase {
1120            input: "pop 0",
1121            expected_error: "expecting label, instruction or eof",
1122            expected_error_count: 1,
1123            message: "instruction `pop` cannot take argument `0`",
1124        }
1125        .run();
1126
1127        NegativeTestCase {
1128            input: "swap 16",
1129            expected_error: "expecting label, instruction or eof",
1130            expected_error_count: 1,
1131            message: "there is no swap 16 instruction",
1132        }
1133        .run();
1134
1135        NegativeTestCase {
1136            input: "dup 16",
1137            expected_error: "expecting label, instruction or eof",
1138            expected_error_count: 1,
1139            message: "there is no dup 16 instruction",
1140        }
1141        .run();
1142    }
1143
1144    #[test]
1145    fn parse_program_bracket_syntax() {
1146        NegativeTestCase {
1147            input: "foo: [foo]",
1148            expected_error: "expecting label, instruction or eof",
1149            expected_error_count: 1,
1150            message: "brackets as call syntax sugar are unsupported",
1151        }
1152        .run();
1153
1154        NegativeTestCase {
1155            input: "foo: [bar]",
1156            expected_error: "expecting label, instruction or eof",
1157            expected_error_count: 1,
1158            message: "brackets as call syntax sugar are unsupported",
1159        }
1160        .run();
1161    }
1162
1163    #[test]
1164    fn parse_program_label_must_start_with_alphabetic_character_or_underscore() {
1165        NegativeTestCase {
1166            input: "1foo: call 1foo",
1167            expected_error: "expecting label, instruction or eof",
1168            expected_error_count: 1,
1169            message: "labels cannot start with a digit",
1170        }
1171        .run();
1172
1173        NegativeTestCase {
1174            input: "-foo: call -foo",
1175            expected_error: "expecting label, instruction or eof",
1176            expected_error_count: 1,
1177            message: "labels cannot start with a dash",
1178        }
1179        .run();
1180
1181        TestCase {
1182            input: "_foo: call _foo",
1183            expected: vec![Instruction::Call(bfe!(0)), Instruction::Call(bfe!(0))],
1184            message: "labels can start with an underscore",
1185        }
1186        .run();
1187    }
1188
1189    #[test]
1190    fn parse_type_hint_with_zero_length_range() {
1191        NegativeTestCase {
1192            input: "hint foo: Type = stack[0..0]",
1193            expected_error: "range end must be greater than range start",
1194            expected_error_count: 1,
1195            message: "parse type hint with zero-length range",
1196        }
1197        .run();
1198    }
1199
1200    #[test]
1201    fn parse_type_hint_with_range_and_offset_and_missing_closing_bracket() {
1202        NegativeTestCase {
1203            input: "hint foo: Type = stack[2..5",
1204            expected_error: "expecting label, instruction or eof",
1205            expected_error_count: 1,
1206            message: "parse type hint with range and offset and missing closing bracket",
1207        }
1208        .run();
1209    }
1210
1211    #[test]
1212    fn parse_type_hint_with_range_and_offset_and_missing_opening_bracket() {
1213        NegativeTestCase {
1214            input: "hint foo: Type = stack2..5]",
1215            expected_error: "expecting label, instruction or eof",
1216            expected_error_count: 1,
1217            message: "parse type hint with range and offset and missing opening bracket",
1218        }
1219        .run();
1220    }
1221
1222    #[test]
1223    fn parse_type_hint_with_range_and_offset_and_missing_equals_sign() {
1224        NegativeTestCase {
1225            input: "hint foo: Type stack[2..5];",
1226            expected_error: "expecting label, instruction or eof",
1227            expected_error_count: 1,
1228            message: "parse type hint with range and offset and missing equals sign",
1229        }
1230        .run();
1231    }
1232
1233    #[test]
1234    fn parse_type_hint_with_range_and_offset_and_missing_type_name() {
1235        NegativeTestCase {
1236            input: "hint foo: = stack[2..5]",
1237            expected_error: "expecting label, instruction or eof",
1238            expected_error_count: 1,
1239            message: "parse type hint with range and offset and missing type name",
1240        }
1241        .run();
1242    }
1243
1244    #[test]
1245    fn parse_type_hint_with_range_and_offset_and_missing_variable_name() {
1246        NegativeTestCase {
1247            input: "hint : Type = stack[2..5]",
1248            expected_error: "label must be neither instruction nor keyword",
1249            expected_error_count: 1,
1250            message: "parse type hint with range and offset and missing variable name",
1251        }
1252        .run();
1253    }
1254
1255    #[test]
1256    fn parse_type_hint_with_range_and_offset_and_missing_colon() {
1257        NegativeTestCase {
1258            input: "hint foo Type = stack[2..5]",
1259            expected_error: "expecting label, instruction or eof",
1260            expected_error_count: 1,
1261            message: "parse type hint with range and offset and missing colon",
1262        }
1263        .run();
1264    }
1265
1266    #[test]
1267    fn parse_type_hint_with_range_and_offset_and_missing_hint() {
1268        NegativeTestCase {
1269            input: "foo: Type = stack[2..5];",
1270            expected_error: "expecting label, instruction or eof",
1271            expected_error_count: 1,
1272            message: "parse type hint with range and offset and missing hint",
1273        }
1274        .run();
1275    }
1276
1277    #[test]
1278    fn parse_simple_assertion_contexts() {
1279        TestCase {
1280            input: "assert",
1281            expected: vec![Instruction::Assert],
1282            message: "naked assert",
1283        }
1284        .run();
1285        TestCase {
1286            input: "assert_vector",
1287            expected: vec![Instruction::AssertVector],
1288            message: "naked assert_vector",
1289        }
1290        .run();
1291
1292        TestCase {
1293            input: "assert error_id 42",
1294            expected: vec![Instruction::Assert],
1295            message: "assert, then id",
1296        }
1297        .run();
1298        TestCase {
1299            input: "assert_vector error_id 42",
1300            expected: vec![Instruction::AssertVector],
1301            message: "assert_vector, then id",
1302        }
1303        .run();
1304
1305        TestCase {
1306            input: "assert error_id -42",
1307            expected: vec![Instruction::Assert],
1308            message: "assert, then negative id",
1309        }
1310        .run();
1311        TestCase {
1312            input: "assert_vector error_id -42",
1313            expected: vec![Instruction::AssertVector],
1314            message: "assert_vector, then negative id",
1315        }
1316        .run();
1317
1318        TestCase {
1319            input: "assert error_id 42 nop",
1320            expected: vec![Instruction::Assert, Instruction::Nop],
1321            message: "assert, then id, then nop",
1322        }
1323        .run();
1324        TestCase {
1325            input: "assert_vector error_id 42 nop",
1326            expected: vec![Instruction::AssertVector, Instruction::Nop],
1327            message: "assert_vector, then id, then nop",
1328        }
1329        .run();
1330    }
1331
1332    #[test]
1333    fn assertion_context_error_id_can_handle_edge_case_ids() {
1334        let instructions = [
1335            ("assert", Instruction::Assert),
1336            ("assert_vector", Instruction::AssertVector),
1337        ];
1338        let ids = [i128::MIN, -1, 0, 1, i128::MAX];
1339
1340        for ((instruction_str, instruction), id) in instructions.into_iter().cartesian_product(ids)
1341        {
1342            TestCase {
1343                input: &format!("{instruction_str} error_id {id}"),
1344                expected: vec![instruction],
1345                message: "assert, then edge case id",
1346            }
1347            .run();
1348        }
1349    }
1350
1351    #[test]
1352    fn assertion_context_error_id_fails_on_too_large_error_ids() {
1353        NegativeTestCase {
1354            input: "assert error_id -170141183460469231731687303715884105729",
1355            expected_error: "expecting label, instruction or eof",
1356            expected_error_count: 1,
1357            message: "error id smaller than i128::MIN",
1358        }
1359        .run();
1360        NegativeTestCase {
1361            input: "assert error_id 170141183460469231731687303715884105728",
1362            expected_error: "expecting label, instruction or eof",
1363            expected_error_count: 1,
1364            message: "error id larger than i128::MAX",
1365        }
1366        .run();
1367    }
1368
1369    #[proptest]
1370    fn assertion_context_error_id_can_handle_any_valid_id(id: i128) {
1371        TestCase {
1372            input: &format!("assert error_id {id}"),
1373            expected: vec![Instruction::Assert],
1374            message: "assert, then random id",
1375        }
1376        .run();
1377    }
1378
1379    #[test]
1380    fn parse_erroneous_assertion_contexts_for_assert() {
1381        NegativeTestCase {
1382            input: "assert error_id",
1383            expected_error: "expecting label, instruction or eof",
1384            expected_error_count: 1,
1385            message: "missing id after keyword `error_id`",
1386        }
1387        .run();
1388        NegativeTestCase {
1389            input: "assert error id 42",
1390            expected_error: "expecting label, instruction or eof",
1391            expected_error_count: 1,
1392            message: "incorrect keyword `error id`",
1393        }
1394        .run();
1395        NegativeTestCase {
1396            input: "error_id 42",
1397            expected_error: "incorrectly placed assertion context",
1398            expected_error_count: 1,
1399            message: "standalone assertion context",
1400        }
1401        .run();
1402        NegativeTestCase {
1403            input: "error_id 42 error_id 42",
1404            expected_error: "incorrectly placed assertion context",
1405            expected_error_count: 2,
1406            message: "multiple standalone assertion contexts",
1407        }
1408        .run();
1409        NegativeTestCase {
1410            input: "nop error_id 42",
1411            expected_error: "incorrectly placed assertion context",
1412            expected_error_count: 1,
1413            message: "error id without assertion",
1414        }
1415        .run();
1416        NegativeTestCase {
1417            input: "assert error_id 42 error_id 1337",
1418            expected_error: "incorrectly placed assertion context",
1419            expected_error_count: 1,
1420            message: "multiple error ids for the same assertion",
1421        }
1422        .run();
1423        NegativeTestCase {
1424            input: "assert_vector error_id 42 error_id 1337",
1425            expected_error: "incorrectly placed assertion context",
1426            expected_error_count: 1,
1427            message: "multiple error ids for the same vector assertion",
1428        }
1429        .run();
1430        NegativeTestCase {
1431            input: "error_id 42 assert",
1432            expected_error: "incorrectly placed assertion context",
1433            expected_error_count: 1,
1434            message: "error id precedes assertion",
1435        }
1436        .run();
1437        NegativeTestCase {
1438            input: "error_id 42 assert error_id 1337",
1439            expected_error: "incorrectly placed assertion context",
1440            expected_error_count: 1,
1441            message: "first error id precedes assertion",
1442        }
1443        .run();
1444    }
1445
1446    #[proptest]
1447    fn assertion_context_error_id_fails_for_invalid_id(
1448        #[strategy(proptest::strategy::Union::new(["assert", "assert_vector"]))]
1449        instruction: String,
1450        #[filter(#id.parse::<i128>().is_err())] id: String,
1451    ) {
1452        // A valid ID followed by a comment is valid, and therefore not relevant here.
1453        // This might be a monkey-patch, but I don't know how to generate better input.
1454        if let Some(comment_idx) = id.find("//") {
1455            let (id, _) = id.split_at(comment_idx);
1456            prop_assume!(id.parse::<i128>().is_err());
1457        }
1458
1459        // not using `NegativeTest`: different `id`s trigger different errors
1460        let input = format!("{instruction} error_id {id}");
1461        prop_assert!(parse(&input).is_err());
1462    }
1463
1464    #[proptest]
1465    fn type_hint_to_string_to_type_hint_is_identity(#[strategy(arb())] type_hint: TypeHint) {
1466        let type_hint_string = type_hint.to_string();
1467        let instruction_tokens =
1468            parse(&type_hint_string).map_err(|err| TestCaseError::Fail(format!("{err}").into()))?;
1469        let labelled_instructions = to_labelled_instructions(&instruction_tokens);
1470        prop_assert_eq!(1, labelled_instructions.len());
1471        let first_labelled_instruction = labelled_instructions[0].clone();
1472        let_assert!(LabelledInstruction::TypeHint(parsed_type_hint) = first_labelled_instruction);
1473        prop_assert_eq!(type_hint, parsed_type_hint);
1474    }
1475
1476    #[proptest]
1477    fn assertion_context_to_string_to_assertion_context_is_identity(
1478        #[strategy(arb())] context: AssertionContext,
1479    ) {
1480        let assert_with_context = format!("assert {context}");
1481        let instruction_tokens = parse(&assert_with_context)
1482            .map_err(|err| TestCaseError::Fail(format!("{err}").into()))?;
1483        let labelled_instructions = to_labelled_instructions(&instruction_tokens);
1484        prop_assert_eq!(2, labelled_instructions.len());
1485        let_assert!(LabelledInstruction::AssertionContext(parsed) = &labelled_instructions[1]);
1486        prop_assert_eq!(&context, parsed);
1487    }
1488
1489    #[test]
1490    fn triton_asm_macro() {
1491        let instructions = triton_asm!(write_io 3 push 17 call huh lt swap 3);
1492        assert_eq!(
1493            LabelledInstruction::Instruction(AnInstruction::WriteIo(NumberOfWords::N3)),
1494            instructions[0]
1495        );
1496        assert_eq!(
1497            LabelledInstruction::Instruction(AnInstruction::Push(bfe!(17))),
1498            instructions[1]
1499        );
1500        assert_eq!(
1501            LabelledInstruction::Instruction(AnInstruction::Call("huh".to_string())),
1502            instructions[2]
1503        );
1504        assert_eq!(
1505            LabelledInstruction::Instruction(AnInstruction::Lt),
1506            instructions[3]
1507        );
1508        assert_eq!(
1509            LabelledInstruction::Instruction(AnInstruction::Swap(OpStackElement::ST3)),
1510            instructions[4]
1511        );
1512    }
1513
1514    #[test]
1515    fn triton_asm_macro_with_a_single_return() {
1516        let instructions = triton_asm!(return);
1517        assert_eq!(
1518            LabelledInstruction::Instruction(AnInstruction::Return),
1519            instructions[0]
1520        );
1521    }
1522
1523    #[test]
1524    fn triton_asm_macro_with_a_single_assert() {
1525        let instructions = triton_asm!(assert);
1526        assert_eq!(
1527            LabelledInstruction::Instruction(AnInstruction::Assert),
1528            instructions[0]
1529        );
1530    }
1531
1532    #[test]
1533    fn triton_asm_macro_with_only_assert_and_return() {
1534        let instructions = triton_asm!(assert return);
1535        assert_eq!(
1536            LabelledInstruction::Instruction(AnInstruction::Assert),
1537            instructions[0]
1538        );
1539        assert_eq!(
1540            LabelledInstruction::Instruction(AnInstruction::Return),
1541            instructions[1]
1542        );
1543    }
1544
1545    #[test]
1546    fn triton_program_macro() {
1547        let program = triton_program!(
1548            // main entry point
1549            call routine_1
1550            call routine_2 // inline comment
1551            halt
1552
1553            // single line content regarding subroutine 1
1554            routine_1:
1555                /*
1556                 * multi-line comment
1557                 */
1558                call routine_3
1559                return
1560
1561            // subroutine 2 starts here
1562            routine_2:
1563                push 18 push 7 add
1564                push 25 eq assert
1565                return
1566
1567            routine_3:
1568            alternative_label:
1569                nop nop
1570                return
1571        );
1572        println!("{program}");
1573    }
1574
1575    #[test]
1576    fn triton_program_macro_interpolates_various_types() {
1577        let push_arg = rand::rng().random_range(0..BFieldElement::P);
1578        let instruction_push =
1579            LabelledInstruction::Instruction(AnInstruction::Push(push_arg.into()));
1580        let swap_argument = "1";
1581        triton_program!({instruction_push} push {push_arg} swap {swap_argument} eq assert halt);
1582    }
1583
1584    #[test]
1585    fn triton_instruction_macro_parses_simple_instructions() {
1586        assert_eq!(
1587            LabelledInstruction::Instruction(AnInstruction::Halt),
1588            triton_instr!(halt)
1589        );
1590        assert_eq!(
1591            LabelledInstruction::Instruction(AnInstruction::Add),
1592            triton_instr!(add)
1593        );
1594        assert_eq!(
1595            LabelledInstruction::Instruction(AnInstruction::Pop(NumberOfWords::N3)),
1596            triton_instr!(pop 3)
1597        );
1598    }
1599
1600    #[test]
1601    #[should_panic(expected = "not_an_instruction")]
1602    fn triton_instruction_macro_fails_when_encountering_unknown_instruction() {
1603        triton_instr!(not_an_instruction);
1604    }
1605
1606    #[test]
1607    fn triton_instruction_macro_parses_instructions_with_argument() {
1608        assert_eq!(
1609            LabelledInstruction::Instruction(AnInstruction::Push(bfe!(7))),
1610            triton_instr!(push 7)
1611        );
1612        assert_eq!(
1613            LabelledInstruction::Instruction(AnInstruction::Dup(OpStackElement::ST3)),
1614            triton_instr!(dup 3)
1615        );
1616        assert_eq!(
1617            LabelledInstruction::Instruction(AnInstruction::Swap(OpStackElement::ST5)),
1618            triton_instr!(swap 5)
1619        );
1620        assert_eq!(
1621            LabelledInstruction::Instruction(AnInstruction::Call("my_label".to_string())),
1622            triton_instr!(call my_label)
1623        );
1624    }
1625
1626    #[test]
1627    fn triton_asm_macro_can_repeat_instructions() {
1628        let instructions = triton_asm![push 42; 3];
1629        let expected_instructions =
1630            vec![LabelledInstruction::Instruction(AnInstruction::Push(bfe!(42))); 3];
1631        assert_eq!(expected_instructions, instructions);
1632
1633        let instructions = triton_asm![read_io 2; 15];
1634        let expected_instructions =
1635            vec![LabelledInstruction::Instruction(AnInstruction::ReadIo(NumberOfWords::N2)); 15];
1636        assert_eq!(expected_instructions, instructions);
1637
1638        let instructions = triton_asm![divine 3; Digest::LEN];
1639        let expected_instructions =
1640            vec![
1641                LabelledInstruction::Instruction(AnInstruction::Divine(NumberOfWords::N3));
1642                Digest::LEN
1643            ];
1644        assert_eq!(expected_instructions, instructions);
1645    }
1646
1647    #[test]
1648    fn break_gets_turned_into_labelled_instruction() {
1649        let instructions = triton_asm![break];
1650        let expected_instructions = vec![LabelledInstruction::Breakpoint];
1651        assert_eq!(expected_instructions, instructions);
1652    }
1653
1654    #[test]
1655    fn break_does_not_propagate_to_full_program() {
1656        let program = triton_program! { break halt break };
1657        assert_eq!(1, program.len_bwords());
1658    }
1659}
triton_isa/parser.rs

triton_isa/
parser.rs