triton_opcodes/
parser.rs

1use std::collections::HashMap;
2use std::collections::HashSet;
3use std::error::Error;
4
5use nom::branch::alt;
6use nom::bytes::complete::tag;
7use nom::bytes::complete::take_while;
8use nom::bytes::complete::take_while1;
9use nom::character::complete::digit1;
10use nom::combinator::cut;
11use nom::combinator::eof;
12use nom::combinator::fail;
13use nom::combinator::opt;
14use nom::error::context;
15use nom::error::convert_error;
16use nom::error::ErrorKind;
17use nom::error::VerboseError;
18use nom::error::VerboseErrorKind;
19use nom::multi::many0;
20use nom::multi::many1;
21use nom::Finish;
22use nom::IResult;
23use twenty_first::shared_math::b_field_element::BFieldElement;
24
25use crate::instruction::AnInstruction;
26use crate::instruction::AnInstruction::*;
27use crate::instruction::LabelledInstruction;
28use crate::instruction::ALL_INSTRUCTION_NAMES;
29use crate::ord_n::Ord16;
30use crate::ord_n::Ord16::*;
31
32#[derive(Debug, PartialEq)]
33pub struct ParseError<'a> {
34    pub input: &'a str,
35    pub errors: VerboseError<&'a str>,
36}
37
38/// A `ParsedInstruction` has `call` addresses encoded as label names.
39#[derive(Debug, Clone, PartialEq, Eq, Hash)]
40pub enum ParsedInstruction<'a> {
41    Instruction(AnInstruction<String>, &'a str),
42    Label(String, &'a str),
43}
44
45impl<'a> std::fmt::Display for ParsedInstruction<'a> {
46    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47        match self {
48            ParsedInstruction::Instruction(instr, _) => write!(f, "{instr}"),
49            ParsedInstruction::Label(label_name, _) => write!(f, "{label_name}:"),
50        }
51    }
52}
53
54impl<'a> std::fmt::Display for ParseError<'a> {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        write!(f, "{}", pretty_print_error(self.input, self.errors.clone()))
57    }
58}
59
60impl<'a> Error for ParseError<'a> {}
61
62impl<'a> ParsedInstruction<'a> {
63    pub fn token_str(&self) -> &'a str {
64        match self {
65            ParsedInstruction::Instruction(_, token_str) => token_str,
66            ParsedInstruction::Label(_, token_str) => token_str,
67        }
68    }
69
70    pub fn to_labelled(&self) -> LabelledInstruction {
71        use ParsedInstruction::*;
72        match self {
73            Instruction(instr, _) => LabelledInstruction::Instruction(instr.to_owned()),
74            Label(label, _) => LabelledInstruction::Label(label.to_owned()),
75        }
76    }
77}
78
79pub fn to_labelled(instructions: &[ParsedInstruction]) -> Vec<LabelledInstruction> {
80    instructions
81        .iter()
82        .map(|instruction| instruction.to_labelled())
83        .collect()
84}
85
86/// Pretty-print a parse error
87///
88/// This function wraps `convert_error()`.
89///
90/// `VerboseError` accumulates each nested contexts in which an error occurs.
91///
92/// Every `fail()` is wrapped in a `context()`, so by skipping the root `ErrorKind::Fail`s
93/// and `ErrorKind::Eof`s, manually triggered custom errors are only shown in the output
94/// once with the `context()` message.
95pub fn pretty_print_error(s: &str, mut e: VerboseError<&str>) -> String {
96    let (_root_s, root_error) = e.errors[0].clone();
97    if matches!(root_error, VerboseErrorKind::Nom(ErrorKind::Fail))
98        || matches!(root_error, VerboseErrorKind::Nom(ErrorKind::Eof))
99    {
100        e.errors.remove(0);
101    }
102    convert_error(s, e)
103}
104
105/// Parse a program
106pub fn parse(input: &str) -> Result<Vec<ParsedInstruction>, ParseError> {
107    let instructions = match program(input).finish() {
108        Ok((_s, instructions)) => Ok(instructions),
109        Err(errors) => Err(ParseError { input, errors }),
110    }?;
111
112    scan_missing_duplicate_labels(input, &instructions)?;
113
114    Ok(instructions)
115}
116
117fn scan_missing_duplicate_labels<'a>(
118    input: &'a str,
119    instructions: &[ParsedInstruction<'a>],
120) -> Result<(), ParseError<'a>> {
121    let mut seen: HashMap<&str, ParsedInstruction> = HashMap::default();
122    let mut duplicates: HashSet<ParsedInstruction> = HashSet::default();
123    let mut missings: HashSet<ParsedInstruction> = HashSet::default();
124
125    // Find duplicate labels, including the first occurrence of each duplicate.
126    for instruction in instructions.iter() {
127        if let ParsedInstruction::Label(label, _token_s) = instruction {
128            if let Some(first_label) = seen.get(label.as_str()) {
129                duplicates.insert(first_label.to_owned());
130                duplicates.insert(instruction.to_owned());
131            } else {
132                seen.insert(label.as_str(), instruction.to_owned());
133            }
134        }
135    }
136
137    // Find missing labels
138    for instruction in instructions.iter() {
139        if let ParsedInstruction::Instruction(Call(addr), _token_s) = instruction {
140            if !seen.contains_key(addr.as_str()) {
141                missings.insert(instruction.to_owned());
142            }
143        }
144    }
145
146    if duplicates.is_empty() && missings.is_empty() {
147        return Ok(());
148    }
149
150    // Collect duplicate and missing error messages
151    let mut errors: Vec<(&str, VerboseErrorKind)> =
152        Vec::with_capacity(duplicates.len() + missings.len());
153
154    for duplicate in duplicates {
155        let error = (
156            duplicate.token_str(),
157            VerboseErrorKind::Context("duplicate label"),
158        );
159        errors.push(error);
160    }
161
162    for missing in missings {
163        let error = (
164            missing.token_str(),
165            VerboseErrorKind::Context("missing label"),
166        );
167        errors.push(error);
168    }
169
170    let errors = VerboseError { errors };
171    Err(ParseError { input, errors })
172}
173
174/// Auxiliary type alias: `IResult` defaults to `nom::error::Error` as concrete
175/// error type, but we want `nom::error::VerboseError` as it allows `context()`.
176type ParseResult<'input, Out> = IResult<&'input str, Out, VerboseError<&'input str>>;
177
178fn program(s: &str) -> ParseResult<Vec<ParsedInstruction>> {
179    let (s, _) = comment_or_whitespace0(s)?;
180    let (s, instructions) = many0(alt((label, labelled_instruction)))(s)?;
181    let (s, _) = context("expecting label, instruction or eof", eof)(s)?;
182
183    Ok((s, instructions))
184}
185
186fn labelled_instruction(s_instr: &str) -> ParseResult<ParsedInstruction> {
187    let (s, instr) = an_instruction(s_instr)?;
188    Ok((s, ParsedInstruction::Instruction(instr, s_instr)))
189}
190
191fn label(label_s: &str) -> ParseResult<ParsedInstruction> {
192    let (s, addr) = label_addr(label_s)?;
193    let (s, _) = token0(":")(s)?; // don't require space after ':'
194
195    // Checking if `<label>:` is an instruction must happen after parsing `:`, since otherwise
196    // `cut` will reject the alternative parser of `label`, being `labelled_instruction`, which
197    // *is* allowed to contain valid instruction names.
198    if is_instruction_name(&addr) {
199        return cut(context("label cannot be named after instruction", fail))(label_s);
200    }
201
202    Ok((s, ParsedInstruction::Label(addr, label_s)))
203}
204
205fn an_instruction(s: &str) -> ParseResult<AnInstruction<String>> {
206    // OpStack manipulation
207    let pop = instruction("pop", Pop);
208    let push = push_instruction();
209    let divine = instruction("divine", Divine);
210    let dup = dup_instruction();
211    let swap = swap_instruction();
212
213    let opstack_manipulation = alt((pop, push, dup, swap));
214
215    // Control flow
216    let nop = instruction("nop", Nop);
217    let skiz = instruction("skiz", Skiz);
218    let call = call_instruction();
219    let return_ = instruction("return", Return);
220    let recurse = instruction("recurse", Recurse);
221    let assert_ = instruction("assert", Assert);
222    let halt = instruction("halt", Halt);
223
224    let control_flow = alt((nop, skiz, call, return_, recurse, halt));
225
226    // Memory access
227    let read_mem = instruction("read_mem", ReadMem);
228    let write_mem = instruction("write_mem", WriteMem);
229
230    let memory_access = alt((read_mem, write_mem));
231
232    // Hashing-related instructions
233    let hash = instruction("hash", Hash);
234    let divine_sibling = instruction("divine_sibling", DivineSibling);
235    let assert_vector = instruction("assert_vector", AssertVector);
236    let absorb_init = instruction("absorb_init", AbsorbInit);
237    let absorb = instruction("absorb", Absorb);
238    let squeeze = instruction("squeeze", Squeeze);
239
240    let hashing_related = alt((hash, divine_sibling, absorb_init, absorb, squeeze));
241
242    // Arithmetic on stack instructions
243    let add = instruction("add", Add);
244    let mul = instruction("mul", Mul);
245    let invert = instruction("invert", Invert);
246    let eq = instruction("eq", Eq);
247    let split = instruction("split", Split);
248    let lt = instruction("lt", Lt);
249    let and = instruction("and", And);
250    let xor = instruction("xor", Xor);
251    let log_2_floor = instruction("log_2_floor", Log2Floor);
252    let pow = instruction("pow", Pow);
253    let div = instruction("div", Div);
254    let pop_count = instruction("pop_count", PopCount);
255    let xxadd = instruction("xxadd", XxAdd);
256    let xxmul = instruction("xxmul", XxMul);
257    let xinvert = instruction("xinvert", XInvert);
258    let xbmul = instruction("xbmul", XbMul);
259
260    let base_field_arithmetic_on_stack = alt((add, mul, invert, eq));
261    let bitwise_arithmetic_on_stack = alt((split, lt, and, xor, log_2_floor, pow, div, pop_count));
262    let extension_field_arithmetic_on_stack = alt((xxadd, xxmul, xinvert, xbmul));
263    let arithmetic_on_stack = alt((
264        base_field_arithmetic_on_stack,
265        bitwise_arithmetic_on_stack,
266        extension_field_arithmetic_on_stack,
267    ));
268
269    // Read/write
270    let read_io = instruction("read_io", ReadIo);
271    let write_io = instruction("write_io", WriteIo);
272
273    let read_write = alt((read_io, write_io));
274
275    // Because of common prefixes, the following parsers are sensitive to order:
276    //
277    // Successfully parsing "assert" before trying "assert_vector" can lead to
278    // picking the wrong one. By trying them in the order of longest first, less
279    // backtracking is necessary.
280    let syntax_ambiguous = alt((assert_vector, assert_, divine));
281
282    alt((
283        opstack_manipulation,
284        control_flow,
285        memory_access,
286        hashing_related,
287        arithmetic_on_stack,
288        read_write,
289        syntax_ambiguous,
290    ))(s)
291}
292
293fn is_instruction_name(s: &str) -> bool {
294    ALL_INSTRUCTION_NAMES.contains(&s)
295}
296
297fn instruction<'a>(
298    name: &'a str,
299    instruction: AnInstruction<String>,
300) -> impl Fn(&'a str) -> ParseResult<AnInstruction<String>> {
301    move |s: &'a str| {
302        let (s, _) = token1(name)(s)?; // require space after instruction name
303        Ok((s, instruction.clone()))
304    }
305}
306
307fn push_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
308    move |s: &str| {
309        let (s, _) = token1("push")(s)?; // require space after instruction name
310        let (s, elem) = field_element(s)?;
311        let (s, _) = comment_or_whitespace1(s)?; // require space after field element
312
313        Ok((s, Push(elem)))
314    }
315}
316
317fn dup_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
318    move |s: &str| {
319        let (s, _) = token1("dup")(s)?; // require space before argument
320        let (s, stack_register) = stack_register(s)?;
321        let (s, _) = comment_or_whitespace1(s)?;
322
323        Ok((s, Dup(stack_register)))
324    }
325}
326
327fn swap_instruction() -> impl Fn(&str) -> ParseResult<AnInstruction<String>> {
328    move |s: &str| {
329        let (s, _) = token1("swap")(s)?; // require space before argument
330        let (s, stack_register) = stack_register(s)?;
331        let (s, _) = comment_or_whitespace1(s)?;
332
333        if stack_register == ST0 {
334            return cut(context("instruction `swap` cannot take argument `0`", fail))(s);
335        }
336
337        Ok((s, Swap(stack_register)))
338    }
339}
340
341fn call_instruction<'a>() -> impl Fn(&'a str) -> ParseResult<AnInstruction<String>> {
342    let call_syntax = move |s: &'a str| {
343        let (s_label, _) = token1("call")(s)?; // require space before called label
344        let (s, addr) = label_addr(s_label)?;
345        let (s, _) = comment_or_whitespace1(s)?; // require space after called label
346
347        Ok((s, addr))
348    };
349
350    let bracket_syntax = move |s: &'a str| {
351        let (s, _) = tag("[")(s)?;
352        let (s, addr) = label_addr(s)?;
353        let (s, _) = token0("]")(s)?;
354
355        Ok((s, addr))
356    };
357
358    move |s: &'a str| {
359        let (s, addr) = alt((call_syntax, bracket_syntax))(s)?;
360
361        // This check cannot be moved into `label_addr`, since `label_addr` is shared
362        // between the scenarios `<label>:` and `call <label>`; the former requires
363        // parsing the `:` before rejecting a possible instruction name in the label.
364        if is_instruction_name(&addr) {
365            return cut(context("label cannot be named after instruction", fail))(s);
366        }
367
368        Ok((s, Call(addr)))
369    }
370}
371
372fn field_element(s_orig: &str) -> ParseResult<BFieldElement> {
373    let (s, negative) = opt(tag("-"))(s_orig)?;
374    let (s, n) = digit1(s)?;
375
376    let mut n: i128 = match n.parse() {
377        Ok(n) => n,
378        Err(_err) => {
379            return context("out-of-bounds constant", fail)(s);
380        }
381    };
382
383    let quotient = BFieldElement::P as i128;
384    if n >= quotient {
385        return context("out-of-bounds constant", fail)(s_orig);
386    }
387
388    if negative.is_some() {
389        n *= -1;
390        n += quotient;
391    }
392
393    Ok((s, BFieldElement::new(n as u64)))
394}
395
396fn stack_register(s: &str) -> ParseResult<Ord16> {
397    let (s, n) = digit1(s)?;
398    let stack_register = match n {
399        "0" => ST0,
400        "1" => ST1,
401        "2" => ST2,
402        "3" => ST3,
403        "4" => ST4,
404        "5" => ST5,
405        "6" => ST6,
406        "7" => ST7,
407        "8" => ST8,
408        "9" => ST9,
409        "10" => ST10,
410        "11" => ST11,
411        "12" => ST12,
412        "13" => ST13,
413        "14" => ST14,
414        "15" => ST15,
415        _ => return context("using an out-of-bounds stack register (0-15 exist)", fail)(s),
416    };
417
418    Ok((s, stack_register))
419}
420
421/// Parse a label address. This is used in "`<label>:`" and in "`call <label>`".
422fn label_addr(s_orig: &str) -> ParseResult<String> {
423    let (s, addr_part_0) = take_while1(is_label_start_char)(s_orig)?;
424    if addr_part_0.is_empty() {
425        // todo: this error is never shown to the user, since the `label` parser is wrapped in an
426        //  `alt`. With a custom error type, it is possible to have alt return the error of the
427        //  parser that went the farthest in the input data.
428        let failure_reason = "label must start with an alphabetic character or underscore";
429        return context(failure_reason, fail)(s_orig);
430    }
431    let (s, addr_part_1) = take_while(is_label_char)(s)?;
432
433    Ok((s, format!("{}{}", addr_part_0, addr_part_1)))
434}
435
436fn is_label_start_char(c: char) -> bool {
437    c.is_alphabetic() || c == '_'
438}
439
440fn is_label_char(c: char) -> bool {
441    c.is_alphanumeric() || c == '_' || c == '-'
442}
443
444/// Parse 0 or more comments and/or whitespace.
445///
446/// This is used after places where whitespace is optional (e.g. after ':').
447fn comment_or_whitespace0(s: &str) -> ParseResult<&str> {
448    let (s, _) = many0(alt((comment1, whitespace1)))(s)?;
449    Ok((s, ""))
450}
451
452/// Parse at least one comment and/or whitespace, or eof.
453///
454/// This is used after places where whitespace is mandatory (e.g. after tokens).
455fn comment_or_whitespace1<'a>(s: &'a str) -> ParseResult<&'a str> {
456    let cws1 = |s: &'a str| -> ParseResult<&str> {
457        let (s, _) = many1(alt((comment1, whitespace1)))(s)?;
458        Ok((s, ""))
459    };
460    alt((eof, cws1))(s)
461}
462
463/// Parse one comment (not including the linebreak)
464fn comment1(s: &str) -> ParseResult<()> {
465    let (s, _) = tag("//")(s)?;
466    let (s, _) = take_while(|c| !is_linebreak(c))(s)?;
467    Ok((s, ()))
468}
469
470/// Parse at least one whitespace character
471fn whitespace1(s: &str) -> ParseResult<()> {
472    let (s, _) = take_while1(|c: char| c.is_whitespace())(s)?;
473    Ok((s, ()))
474}
475
476fn is_linebreak(c: char) -> bool {
477    c == '\r' || c == '\n'
478}
479
480/// `token0(tok)` will parse the string `tok` and munch 0 or more comment or whitespace.
481fn token0<'a>(token: &'a str) -> impl Fn(&'a str) -> ParseResult<()> {
482    move |s: &'a str| {
483        let (s, _) = tag(token)(s)?;
484        let (s, _) = comment_or_whitespace0(s)?;
485        Ok((s, ()))
486    }
487}
488
489/// `token1(tok)` will parse the string `tok` and munch at least one comment and/or whitespace,
490/// or eof.
491fn token1<'a>(token: &'a str) -> impl Fn(&'a str) -> ParseResult<()> {
492    move |s: &'a str| {
493        let (s, _) = tag(token)(s)?;
494        let (s, _) = comment_or_whitespace1(s)?;
495        Ok((s, ()))
496    }
497}
498
499#[cfg(test)]
500pub mod parser_tests {
501    use itertools::Itertools;
502    use rand::distributions::WeightedIndex;
503    use rand::prelude::*;
504    use rand::Rng;
505
506    use LabelledInstruction::*;
507
508    use crate::program::Program;
509
510    use super::*;
511
512    struct TestCase<'a> {
513        input: &'a str,
514        expected: Program,
515        message: &'static str,
516    }
517
518    struct NegativeTestCase<'a> {
519        input: &'a str,
520        expected_error: &'static str,
521        expected_error_count: usize,
522        message: &'static str,
523    }
524
525    fn parse_program_prop(test_case: TestCase) {
526        match parse(test_case.input) {
527            Ok(actual) => assert_eq!(
528                test_case.expected,
529                Program::new(&to_labelled(&actual)),
530                "{}",
531                test_case.message
532            ),
533            Err(parse_err) => panic!("{}:\n{parse_err}", test_case.message),
534        }
535    }
536
537    fn parse_program_neg_prop(test_case: NegativeTestCase) {
538        let result = parse(test_case.input);
539        if result.is_ok() {
540            eprintln!("parser input: {}", test_case.input);
541            eprintln!("parser output: {:?}", result.unwrap());
542            panic!("parser should fail, but didn't: {}", test_case.message);
543        }
544
545        let error = result.unwrap_err();
546        let actual_error_message = format!("{error}");
547        let actual_error_count = actual_error_message
548            .match_indices(test_case.expected_error)
549            .count();
550        if test_case.expected_error_count != actual_error_count {
551            eprintln!("Actual error message:");
552            eprintln!("{actual_error_message}");
553            assert_eq!(
554                test_case.expected_error_count, actual_error_count,
555                "parser should report '{}' {} times: {}",
556                test_case.expected_error, test_case.expected_error_count, test_case.message
557            )
558        }
559    }
560
561    fn whitespace_gen(max_size: usize) -> String {
562        let mut rng = rand::thread_rng();
563        let spaces = [" ", "\t", "\r", "\r\n", "\n", " // comment\n"];
564        let weights = [5, 1, 1, 1, 2, 1];
565        assert_eq!(spaces.len(), weights.len(), "all generators have weights");
566        let dist = WeightedIndex::new(weights).expect("a weighted distribution of generators");
567        let size = rng.gen_range(1..=std::cmp::max(1, max_size));
568        (0..size).map(|_| spaces[dist.sample(&mut rng)]).collect()
569    }
570
571    fn label_gen(size: usize) -> String {
572        let mut rng = rand::thread_rng();
573        let mut new_label = || -> String { (0..size).map(|_| rng.gen_range('a'..='z')).collect() };
574        let mut label = new_label();
575        while is_instruction_name(&label) {
576            label = new_label();
577        }
578        label
579    }
580
581    fn new_label_gen(labels: &mut Vec<String>) -> String {
582        let mut rng = rand::thread_rng();
583        let count = labels.len() * 3 / 2;
584        let index = rng.gen_range(0..=count);
585
586        labels.get(index).cloned().unwrap_or_else(|| {
587            let label_size = 4;
588            let new_label = label_gen(label_size);
589            labels.push(new_label.clone());
590            new_label
591        })
592    }
593
594    fn instruction_gen(labels: &mut Vec<String>) -> Vec<String> {
595        let mut rng = thread_rng();
596
597        let difficult_instructions = vec!["push", "dup", "swap", "skiz", "call"];
598        let simple_instructions = ALL_INSTRUCTION_NAMES
599            .into_iter()
600            .filter(|name| !difficult_instructions.contains(name))
601            .collect_vec();
602
603        let generators = vec![vec!["simple"], difficult_instructions].concat();
604        // Test difficult instructions more frequently.
605        let weights = vec![simple_instructions.len(), 2, 6, 6, 2, 10];
606
607        assert_eq!(
608            generators.len(),
609            weights.len(),
610            "all generators must have weights"
611        );
612        let dist = WeightedIndex::new(&weights).expect("a weighted distribution of generators");
613
614        match generators[dist.sample(&mut rng)] {
615            "simple" => {
616                let index: usize = rng.gen_range(0..simple_instructions.len());
617                let instruction = simple_instructions[index];
618                vec![instruction.to_string()]
619            }
620
621            "push" => {
622                let max: i128 = BFieldElement::MAX as i128;
623                let arg: i128 = rng.gen_range(-max..max);
624                vec!["push".to_string(), format!("{arg}")]
625            }
626
627            "dup" => {
628                let arg: usize = rng.gen_range(0..15);
629                vec!["dup".to_string(), format!("{arg}")]
630            }
631
632            "swap" => {
633                let arg: usize = rng.gen_range(1..15);
634                vec!["swap".to_string(), format!("{arg}")]
635            }
636
637            "skiz" => {
638                let mut target: Vec<String> = instruction_gen(labels);
639                target.insert(0, "skiz".to_string());
640                target
641            }
642
643            "call" => {
644                let some_label: String = new_label_gen(labels);
645                vec!["call".to_string(), some_label]
646            }
647
648            unknown => panic!("Unknown generator, {unknown}"),
649        }
650    }
651
652    // FIXME: Apply shrinking.
653    #[allow(unstable_name_collisions)]
654    // reason = "Switch to standard library intersperse_with() when it's ported"
655    pub fn program_gen(size: usize) -> String {
656        // Generate random program
657        let mut labels = vec![];
658        let mut program: Vec<Vec<String>> =
659            (0..size).map(|_| instruction_gen(&mut labels)).collect();
660
661        // Embed all used labels randomly
662        for label in labels.into_iter().sorted().dedup() {
663            program.push(vec![format!("{label}:")]);
664        }
665        program.shuffle(&mut rand::thread_rng());
666
667        program
668            .into_iter()
669            .flatten()
670            .intersperse_with(|| whitespace_gen(5))
671            .collect()
672    }
673
674    #[test]
675    fn parse_program_empty_test() {
676        parse_program_prop(TestCase {
677            input: "",
678            expected: Program::new(&[]),
679            message: "empty string should parse as empty program",
680        });
681
682        parse_program_prop(TestCase {
683            input: "   ",
684            expected: Program::new(&[]),
685            message: "spaces should parse as empty program",
686        });
687
688        parse_program_prop(TestCase {
689            input: "\n",
690            expected: Program::new(&[]),
691            message: "linebreaks should parse as empty program (1)",
692        });
693
694        parse_program_prop(TestCase {
695            input: "   \n ",
696            expected: Program::new(&[]),
697            message: "linebreaks should parse as empty program (2)",
698        });
699
700        parse_program_prop(TestCase {
701            input: "   \n \n",
702            expected: Program::new(&[]),
703            message: "linebreaks should parse as empty program (3)",
704        });
705
706        parse_program_prop(TestCase {
707            input: "// empty program",
708            expected: Program::new(&[]),
709            message: "single comment should parse as empty program",
710        });
711
712        parse_program_prop(TestCase {
713            input: "// empty program\n",
714            expected: Program::new(&[]),
715            message: "single comment with linebreak should parse as empty program",
716        });
717
718        parse_program_prop(TestCase {
719            input: "// multi-line\n// comment",
720            expected: Program::new(&[]),
721            message: "multiple comments should parse as empty program",
722        });
723
724        parse_program_prop(TestCase {
725            input: "// multi-line\n// comment\n ",
726            expected: Program::new(&[]),
727            message: "multiple comments with trailing whitespace should parse as empty program",
728        });
729
730        for size in 0..10 {
731            let input = whitespace_gen(size);
732            parse_program_prop(TestCase {
733                input: &input,
734                expected: Program::new(&[]),
735                message: "arbitrary whitespace should parse as empty program",
736            });
737        }
738    }
739
740    #[test]
741    fn parse_program_whitespace_test() {
742        parse_program_neg_prop(NegativeTestCase {
743            input: "poppop",
744            expected_error: "n/a",
745            expected_error_count: 0,
746            message: "whitespace required between instructions (pop)",
747        });
748
749        parse_program_neg_prop(NegativeTestCase {
750            input: "dup 0dup 0",
751            expected_error: "n/a",
752            expected_error_count: 0,
753            message: "whitespace required between instructions (dup 0)",
754        });
755
756        parse_program_neg_prop(NegativeTestCase {
757            input: "swap 10swap 10",
758            expected_error: "n/a",
759            expected_error_count: 0,
760            message: "whitespace required between instructions (swap 10)",
761        });
762
763        parse_program_neg_prop(NegativeTestCase {
764            input: "push10",
765            expected_error: "n/a",
766            expected_error_count: 0,
767            message: "push requires whitespace before its constant",
768        });
769
770        parse_program_neg_prop(NegativeTestCase {
771            input: "push 10pop",
772            expected_error: "n/a",
773            expected_error_count: 0,
774            message: "push requires whitespace after its constant",
775        });
776
777        parse_program_neg_prop(NegativeTestCase {
778            input: "hello: callhello",
779            expected_error: "n/a",
780            expected_error_count: 0,
781            message: "call requires whitespace before its label",
782        });
783
784        parse_program_neg_prop(NegativeTestCase {
785            input: "hello: popcall hello",
786            expected_error: "n/a",
787            expected_error_count: 0,
788            message: "required space between pop and call",
789        });
790    }
791
792    #[test]
793    fn parse_program_label_test() {
794        parse_program_prop(TestCase {
795            input: "foo: call foo",
796            expected: Program::new(&[
797                Label("foo".to_string()),
798                Instruction(Call("foo".to_string())),
799            ]),
800            message: "parse labels and calls to labels",
801        });
802
803        parse_program_prop(TestCase {
804            input: "foo:call foo",
805            expected: Program::new(&[
806                Label("foo".to_string()),
807                Instruction(Call("foo".to_string())),
808            ]),
809            message: "whitespace is not required after 'label:'",
810        });
811
812        // FIXME: Increase coverage of negative tests for duplicate labels.
813        parse_program_neg_prop(NegativeTestCase {
814            input: "foo: pop foo: pop call foo",
815            expected_error: "duplicate label",
816            expected_error_count: 2,
817            message: "labels cannot occur twice",
818        });
819
820        // FIXME: Increase coverage of negative tests for missing labels.
821        parse_program_neg_prop(NegativeTestCase {
822            input: "foo: pop call herp call derp",
823            expected_error: "missing label",
824            expected_error_count: 2,
825            message: "non-existent labels cannot be called",
826        });
827
828        // FIXME: Increase coverage of negative tests for label/keyword overlap.
829        parse_program_neg_prop(NegativeTestCase {
830            input: "pop: call pop",
831            expected_error: "label cannot be named after instruction",
832            expected_error_count: 1,
833            message: "label names may not overlap with instruction names",
834        });
835
836        parse_program_prop(TestCase {
837            input: "pops: call pops",
838            expected: Program::new(&[
839                Label("pops".to_string()),
840                Instruction(Call("pops".to_string())),
841            ]),
842            message: "labels that share a common prefix with instruction are labels",
843        });
844
845        parse_program_prop(TestCase {
846            input: "_call: call _call",
847            expected: Program::new(&[
848                Label("_call".to_string()),
849                Instruction(Call("_call".to_string())),
850            ]),
851            message: "labels that share a common suffix with instruction are labels",
852        });
853    }
854
855    #[test]
856    fn parse_program_nonexistent_instructions_test() {
857        parse_program_neg_prop(NegativeTestCase {
858            input: "swap 0",
859            expected_error: "instruction `swap` cannot take argument `0`",
860            expected_error_count: 1,
861            message: "instruction `swap` cannot take argument `0`",
862        });
863
864        parse_program_neg_prop(NegativeTestCase {
865            input: "swap 16",
866            expected_error: "expecting label, instruction or eof",
867            expected_error_count: 1,
868            message: "there is no swap 16 instruction",
869        });
870
871        parse_program_neg_prop(NegativeTestCase {
872            input: "dup 16",
873            expected_error: "expecting label, instruction or eof",
874            expected_error_count: 1,
875            message: "there is no dup 16 instruction",
876        });
877    }
878
879    #[test]
880    fn parse_program_bracket_syntax_test() {
881        parse_program_prop(TestCase {
882            input: "foo: [foo]",
883            expected: Program::new(&[
884                Label("foo".to_string()),
885                Instruction(Call("foo".to_string())),
886            ]),
887            message: "Handle brackets as call syntax sugar",
888        });
889
890        parse_program_neg_prop(NegativeTestCase {
891            input: "foo: [bar]",
892            expected_error: "missing label",
893            expected_error_count: 1,
894            message: "Handle missing labels with bracket syntax",
895        })
896    }
897
898    #[test]
899    fn parse_program_test() {
900        for size in 0..100 {
901            let code = program_gen(size * 10);
902
903            let new_actual = parse(&code).map_err(|err| err.to_string());
904
905            if new_actual.is_err() {
906                println!("The code:\n{code}\n\n");
907                panic!("{}", new_actual.unwrap_err());
908            }
909        }
910    }
911
912    #[test]
913    fn parse_program_label_must_start_with_alphabetic_character_or_underscore() {
914        parse_program_neg_prop(NegativeTestCase {
915            input: "1foo: call 1foo",
916            expected_error: "expecting label, instruction or eof",
917            expected_error_count: 1,
918            message: "labels cannot start with a digit",
919        });
920
921        parse_program_neg_prop(NegativeTestCase {
922            input: "-foo: call -foo",
923            expected_error: "expecting label, instruction or eof",
924            expected_error_count: 1,
925            message: "labels cannot start with a dash",
926        });
927
928        parse_program_prop(TestCase {
929            input: "_foo: call _foo",
930            expected: Program::new(&[
931                Label("_foo".to_string()),
932                Instruction(Call("_foo".to_string())),
933            ]),
934            message: "labels can start with an underscore",
935        });
936    }
937}