bfc_rs/
parser.rs

1#[cfg(test)]
2mod tests;
3
4/// Compile-time optimization module.
5pub mod cto;
6
7use super::{BrainfuckInstr, SyntaxError};
8/// Struct responsible for parsing Brainfuck.
9pub struct Parser {
10    /// The 1-indexed position (line number, position within line) of the earliest `[` without a matching `]`.
11    earliest_unclosed: (usize, usize),
12    /// The number that keeps track of whether or not the brackets are balanced so far.
13    /// Opening a while loop with `[` bumps this number up by 1. Closing one with `]` drops it by 1.
14    bracket_balance: isize /* In theory, there could be as many closing brackets as opening ones, and they could make up the whole program.
15    Therefore, it makes sense to use `isize`, which can fit in half as big a number as `usize`, but either positive or negative.
16    Remember that we want `bracket_balance` to end up at 0.*/
17}
18impl Parser {
19    // Now that our struct isn't empty, we should make a constructor for it. Structs can be declared directly ad hoc, but only if the current module can see all of the struct's fields.
20    // Rust has no language-level concept of a constructor; we just put a struct declaration into an ordinary function. Convention dictates we name it "new".
21    pub fn new() -> Self {
22        // "Self" is just a way to tell the compiler "put this data type's actual name here".
23        // We could write "fn new() -> Parser", but this is neater and would require less searching-and-replacing if we wanted to manually rename `Parser`.
24        Self {
25            earliest_unclosed: (0, 0),
26            bracket_balance: 0
27        }
28    }
29    pub fn parse(&mut self, code: &str) -> Result<Vec<BrainfuckInstr>, SyntaxError> {
30        use BrainfuckInstr::*;
31        self.earliest_unclosed = (0, 0);
32        self.bracket_balance = 0; // zero out the parser's state just in case
33        let mut output = Vec::new();
34        for (line_number, line) in code.lines().enumerate() {
35            for (ch_number, ch) in line.chars().enumerate() {
36                output.push(match ch {
37                    '<' => PointerDec,
38                    '>' => PointerInc,
39                    '-' => DataDec,
40                    '+' => DataInc,
41                    ',' => GetByte,
42                    '.' => PutByte,
43                    '[' => {
44                        // if we're not within (hopefully) a pair of braces already:
45                        if self.bracket_balance == 0 {
46                            self.earliest_unclosed = (line_number + 1, ch_number + 1);
47                        }
48                        self.bracket_balance += 1;
49                        WhileNonzero // in Rust, code blocks are expressions that evaluate to the last expression within them
50                        // this is why we rarely have to write "return" in functions
51                    },
52                    ']' => {
53                        self.bracket_balance -= 1;
54                        if self.bracket_balance < 0 {
55                            // The moment we have one more ] than there have been [s, it no longer makes sense to parse the rest of the program.
56                            // This is one of those situations where "return" is useful:
57                            return Err(SyntaxError::PrematureEndWhile(line_number + 1, ch_number + 1))
58                        }
59                        EndWhile
60                    },
61                    _ => {
62                        continue // skip this iteration if the character is something else
63                    }
64                });
65            }
66        }
67        if self.bracket_balance == 0 {
68            Ok(output)
69        } else {
70            // We've already returned the appropriate error if there was an extra ], so this is the only remaining possibility.
71            Err(SyntaxError::UnclosedWhile(self.earliest_unclosed.0, self.earliest_unclosed.1))
72        }
73    }
74}