bfc_rs/parser.rs
1#[cfg(test)]
2mod tests;
3
4/// Compile-time optimization module.
5pub mod cto;
6
7use super::{BrainfuckInstr, SyntaxError};
8/// Struct responsible for parsing Brainfuck.
9pub struct Parser {
10 /// The 1-indexed position (line number, position within line) of the earliest `[` without a matching `]`.
11 earliest_unclosed: (usize, usize),
12 /// The number that keeps track of whether or not the brackets are balanced so far.
13 /// Opening a while loop with `[` bumps this number up by 1. Closing one with `]` drops it by 1.
14 bracket_balance: isize /* In theory, there could be as many closing brackets as opening ones, and they could make up the whole program.
15 Therefore, it makes sense to use `isize`, which can fit in half as big a number as `usize`, but either positive or negative.
16 Remember that we want `bracket_balance` to end up at 0.*/
17}
18impl Parser {
19 // Now that our struct isn't empty, we should make a constructor for it. Structs can be declared directly ad hoc, but only if the current module can see all of the struct's fields.
20 // Rust has no language-level concept of a constructor; we just put a struct declaration into an ordinary function. Convention dictates we name it "new".
21 pub fn new() -> Self {
22 // "Self" is just a way to tell the compiler "put this data type's actual name here".
23 // We could write "fn new() -> Parser", but this is neater and would require less searching-and-replacing if we wanted to manually rename `Parser`.
24 Self {
25 earliest_unclosed: (0, 0),
26 bracket_balance: 0
27 }
28 }
29 pub fn parse(&mut self, code: &str) -> Result<Vec<BrainfuckInstr>, SyntaxError> {
30 use BrainfuckInstr::*;
31 self.earliest_unclosed = (0, 0);
32 self.bracket_balance = 0; // zero out the parser's state just in case
33 let mut output = Vec::new();
34 for (line_number, line) in code.lines().enumerate() {
35 for (ch_number, ch) in line.chars().enumerate() {
36 output.push(match ch {
37 '<' => PointerDec,
38 '>' => PointerInc,
39 '-' => DataDec,
40 '+' => DataInc,
41 ',' => GetByte,
42 '.' => PutByte,
43 '[' => {
44 // if we're not within (hopefully) a pair of braces already:
45 if self.bracket_balance == 0 {
46 self.earliest_unclosed = (line_number + 1, ch_number + 1);
47 }
48 self.bracket_balance += 1;
49 WhileNonzero // in Rust, code blocks are expressions that evaluate to the last expression within them
50 // this is why we rarely have to write "return" in functions
51 },
52 ']' => {
53 self.bracket_balance -= 1;
54 if self.bracket_balance < 0 {
55 // The moment we have one more ] than there have been [s, it no longer makes sense to parse the rest of the program.
56 // This is one of those situations where "return" is useful:
57 return Err(SyntaxError::PrematureEndWhile(line_number + 1, ch_number + 1))
58 }
59 EndWhile
60 },
61 _ => {
62 continue // skip this iteration if the character is something else
63 }
64 });
65 }
66 }
67 if self.bracket_balance == 0 {
68 Ok(output)
69 } else {
70 // We've already returned the appropriate error if there was an extra ], so this is the only remaining possibility.
71 Err(SyntaxError::UnclosedWhile(self.earliest_unclosed.0, self.earliest_unclosed.1))
72 }
73 }
74}