mlem_asm/parse/mod.rs
1use mlem::{Instruction, Program};
2use super::lex;
3mod address;
4use self::address::parse_address;
5mod instruction;
6use self::instruction::{InstructionName, parse_instruction};
7#[cfg(test)]
8mod test;
9
10/// Parse a line of the form `instruction [operand1] [operand2] [operand3][;[comment text]]`
11///
12/// The return value is a `Result<Option<Instruction>, String>`. An `Ok(Some(_))` value means a valid
13/// instruction (for instance, the line `move R:R0 R:R1`). An `Err(_)` value means that there is
14/// unparsable about the line (like `move R:R0 R:r1 garbage garbage`); an `Ok(None)` value means that
15/// the line was legal but meant nothing (like `; comment only`).
16/// # Examples
17/// Simple single-line parsing:
18///
19/// ```
20/// use mlem_asm::Instruction;
21/// use mlem_asm::parse::parse_line;
22/// assert!(parse_line("noop") == parse_line("noop;"));
23/// assert!(parse_line("noop") == Ok(Some(Instruction::NoOp)));
24/// assert!(parse_line("") == Ok(None));
25/// ```
26pub fn parse_line(line: &str) -> Result<Option<Instruction>, String> {
27 // Split into "words"
28 let pieces: Vec<_> = lex::lex_line(line);
29
30 // If there are no words, this line is useless.
31 if pieces.len() == 0 { return Ok(None); }
32
33 let mut instruction_name = InstructionName::None;
34 let mut arg1 = None;
35 let mut arg2 = None;
36 let mut arg3 = None;
37 // Parse the name of the instruction.
38 if pieces.len() >= 1 {
39 match parse_instruction(pieces[0]) {
40 Ok(v) => { instruction_name = v; },
41 Err(e) => { return Err(e); }
42 };
43 }
44
45 // A single piece means a no-arg instruction; it can go straight to an Instruction.
46 if pieces.len() == 1 {
47 return match instruction_name {
48 InstructionName::NoOp => Ok(Some(Instruction::NoOp)),
49 InstructionName::Halt => Ok(Some(Instruction::Halt)),
50 InstructionName::Illegal => Ok(Some(Instruction::Illegal)),
51 _ => Err("Wrong number of arguments. Got 0.".into())
52 };
53 }
54
55 // More than one word means the args need parsed.
56 if pieces.len() >= 2 {
57 match parse_address(pieces[1].trim()) {
58 Ok(v) => { arg1 = Some(v); },
59 Err(e) => { return Err(e); }
60 };
61 }
62 if pieces.len() >= 3 {
63 match parse_address(pieces[2].trim()) {
64 Ok(v) => { arg2 = Some(v); },
65 Err(e) => { return Err(e); }
66 };
67 }
68 if pieces.len() >= 4 {
69 match parse_address(pieces[3].trim()) {
70 Ok(v) => { arg3 = Some(v); },
71 Err(e) => { return Err(e); }
72 };
73 }
74
75 // Single argument instruction
76 if pieces.len() == 2 {
77 // Alias arg1 to its inner value, which DEFINITELY exists at this point.
78 let arg1 = arg1.unwrap();
79 return match instruction_name {
80 InstructionName::Zero => Ok(Some(Instruction::Zero(arg1))),
81 InstructionName::Input => Ok(Some(Instruction::Input(arg1))),
82 InstructionName::Output => Ok(Some(Instruction::Output(arg1))),
83 InstructionName::Jump => Ok(Some(Instruction::Jump(arg1))),
84 InstructionName::Push => Ok(Some(Instruction::Push(arg1))),
85 InstructionName::Pop => Ok(Some(Instruction::Pop(arg1))),
86 _ => Err("Wrong number of arguments. Got 1.".into())
87 };
88 }
89
90 // Two argument instructions
91 if pieces.len() == 3 {
92 // Alias the arguments known to exist
93 let arg1 = arg1.unwrap();
94 let arg2 = arg2.unwrap();
95 return match instruction_name {
96 InstructionName::Move => Ok(Some(Instruction::Move(arg1, arg2))),
97 InstructionName::Add => Ok(Some(Instruction::Add(arg1, arg2))),
98 InstructionName::Sub => Ok(Some(Instruction::Sub(arg1, arg2))),
99 InstructionName::JumpIfZero => Ok(Some(Instruction::JumpIfZero(arg1, arg2))),
100 InstructionName::JumpNotZero => Ok(Some(Instruction::JumpNotZero(arg1, arg2))),
101 _ => Err("Wrong number of arguments. Got 2.".into())
102 }
103 }
104
105 Err("Malformed. Perhaps there are too many terms?".into())
106}
107
108/// Simply parse a program, each line resulting in either a valid or invalid line (Ok or Err).
109/// This function can't fail; however, there's no guarantee that even one valid instruction is produced.
110fn initial_parse_program(program: &str) -> Vec<Result<Option<Instruction>, String>> {
111 let lines = program.lines();
112 let mut v = Vec::new();
113 for line in lines {
114 match parse_line(line) {
115 Ok(i) => { v.push(Ok(i)); }
116 Err(e) => { v.push(Err(e)); }
117 }
118 }
119 v
120}
121
122/// Parse an entire program, returning either a ready-to-execute MLeM program or
123/// a Vec of error messages, with line numbers, of all errors in the program.
124/// # Example
125/// A valid program:
126///
127/// ```
128/// use mlem_asm::*;
129/// let valid_program = "
130/// noop
131/// move R:R0 R:SP;
132/// input R:R0;
133/// ; comment only
134///
135/// ";
136/// let expected_program = Ok(vec![
137/// Instruction::NoOp,
138/// Instruction::Move(Address::RegAbs(Register::R0), Address::RegAbs(Register::SP)),
139/// Instruction::Input(Address::RegAbs(Register::R0))
140/// ]);
141/// let program = parse_program(valid_program);
142/// assert!(program == expected_program, "Program resulted in: {:?} not: {:?}", program, expected_program);
143/// ```
144///
145/// An invalid program:
146///
147/// ```
148/// use mlem_asm::*;
149/// let invalid_program = "
150/// noop
151/// move R:R0 R:xx;
152/// output invalid;
153/// ; comment only
154///
155/// ";
156/// let expected_errors = Err(vec![(2, "Unknown register name: xx".into()), (3, "Malformed address.".into())]);
157/// let errors = parse_program(invalid_program);
158/// assert!(errors == expected_errors, "Program resulted in: {:?} not: {:?}", errors, expected_errors);
159/// ```
160pub fn parse_program(program: &str) -> Result<Program, Vec<(u64, String)>> {
161 let mut p = Vec::new();
162 let mut errors = Vec::new();
163 for (n, line) in initial_parse_program(program).into_iter().enumerate() {
164 match line {
165 Ok(v) => {
166 if let Some(i) = v { p.push(i) };
167 },
168 Err(e) => {
169 errors.push((n as u64, format!("{}", e)));
170 }
171 };
172 }
173 if errors.len() == 0 {
174 // No errors!
175 Ok(p)
176 } else {
177 Err(errors)
178 }
179}