binter/
lib.rs

1//! This module exports brainfuck machine and interpreter implementations.
2#![warn(missing_docs)]
3use std::cmp::Ordering;
4use std::fmt;
5use std::fs::File;
6use std::io::{self, BufRead, BufReader, Error, ErrorKind, Read, Result, Write};
7use std::os::unix::io::AsRawFd;
8use std::path::Path;
9
10#[cfg(test)]
11mod tests;
12
13#[derive(Copy, Clone, PartialEq, Debug)]
14enum Token {
15    // post-lexing, pre-optimization tokens
16    Increment,
17    Decrement,
18    ShiftLeft,
19    ShiftRight,
20    StartLoop,
21    EndLoop,
22    // io tokens
23    PutChar,
24    ReadChar,
25}
26
27#[derive(Clone, PartialEq, Debug)]
28enum Statement {
29    MoveLeft(usize),
30    MoveRight(usize),
31
32    Add(u8),
33
34    Loop(Box<Vec<Statement>>),
35    PutChar,
36    ReadChar,
37}
38
39impl Statement {
40    fn is_equal_type(&self, other: &Self) -> bool {
41        std::mem::discriminant(self) == std::mem::discriminant(other)
42    }
43    fn is_move(&self) -> bool {
44        matches!(self, &(Statement::MoveLeft(_) | Statement::MoveRight(_)))
45    }
46    fn new_loop(statements: Vec<Statement>) -> Self {
47        Self::Loop(Box::new(statements))
48    }
49}
50
51/// This struct is used as an implementation of a brainfuck-compatible
52/// Turing-like machine that supports basic operations needed for such
53/// compilations. This machine works under an assumption that chars can be
54/// converted into [`u8`] freely through ASCII decoding and encoding.
55pub struct BrainfuckMachine {
56    /// Size of the tape vector.
57    size: usize,
58    /// Current cell index.
59    index: usize,
60    /// Tape vector.
61    tape: Vec<u8>,
62}
63
64impl BrainfuckMachine {
65    /// Creates a `BrainfuckMachine` instance of given tape size.
66    pub fn new(size: usize) -> Self {
67        let mut result = Self {
68            size,
69            index: 0,
70            tape: Vec::new(),
71        };
72        result.tape.resize(size, 0);
73        result
74    }
75
76    /// Moves the header left by a given amount. Panics when the index is out
77    /// of bounds.
78    pub fn move_left(&mut self, shift: usize) {
79        match shift.cmp(&(self.index)) {
80            Ordering::Greater => panic!(
81                "Index out of bounds.
82Index before move: {}.
83Left shift value: {}.
84",
85                self.index, shift,
86            ),
87            _ => self.index -= shift,
88        }
89    }
90    /// Moves the header right by a given amount. Panics when the index is out
91    /// of bounds.
92    pub fn move_right(&mut self, shift: usize) {
93        match shift.cmp(&(self.size - self.index)) {
94            Ordering::Greater => panic!(
95                "Index out of bounds.
96Index before move: {}.
97Right shift value: {}.
98Max possible index: {}.
99",
100                self.index,
101                shift,
102                self.size - 1
103            ),
104            _ => self.index += shift,
105        }
106    }
107
108    /// Adds a given value to the current cell, with wrapping.
109    pub fn add(&mut self, value: u8) {
110        let current = self.tape[self.index];
111        self.tape[self.index] = current.wrapping_add(value);
112    }
113
114    /// Substracts a given value to the current cell, with wrapping.
115    pub fn substract(&mut self, value: u8) {
116        let current = self.tape[self.index];
117        self.tape[self.index] = current.wrapping_sub(value);
118    }
119
120    /// Inserts a given char's ASCII value into the current cell.
121    pub fn read_char(&mut self, input: char) {
122        self.tape[self.index] = input as u8
123    }
124
125    /// Returns the current cell's value ASCII encoded into a char.
126    pub fn put_char(&self) -> char {
127        self.tape[self.index] as char
128    }
129
130    /// Returns `true` if the current cell's value is non-zero.
131    pub fn check_loop(&self) -> bool {
132        self.tape[self.index] != 0
133    }
134
135    /// Returns a copy of the vector representing the tape.
136    fn get_tape(&self) -> Vec<u8> {
137        self.tape.clone()
138    }
139}
140
141// Brainfuck grammar:
142// code := (stmt_block)*
143//
144// stmt_block := stmt | loop
145//
146// loop := '[' stmt_block+ ']'
147//
148// stmt := '+' | '-' | '<' | '>' | ',' | '.'
149struct Lexer<T: BufRead> {
150    reader: T,
151}
152
153impl<T: BufRead> Lexer<T> {
154    fn next_token(&mut self) -> Option<Token> {
155        let mut buf: [u8; 1] = [0];
156        match self.reader.read(&mut buf) {
157            Err(msg) => {
158                panic!("Error when reading a token: {}", msg);
159            }
160            Ok(0) => None,
161            Ok(_) => {
162                let ascii = buf[0];
163                let to_token = ascii as char;
164                Self::tokenize(&to_token)
165            }
166        }
167    }
168    fn eof(&mut self) -> bool {
169        match self.reader.fill_buf() {
170            Ok(buf) => buf.is_empty(),
171            Err(msg) => {
172                panic!("EOF check failed: {}", msg);
173            }
174        }
175    }
176    fn tokenize(input: &char) -> Option<Token> {
177        use crate::Token::*;
178
179        match input {
180            '+' => Some(Increment),
181            '-' => Some(Decrement),
182            '<' => Some(ShiftLeft),
183            '>' => Some(ShiftRight),
184            ',' => Some(ReadChar),
185            '.' => Some(PutChar),
186            '[' => Some(StartLoop),
187            ']' => Some(EndLoop),
188            _ => None,
189        }
190    }
191    fn iter(&mut self) -> LexerRefIter<'_, T> {
192        LexerRefIter { lexer: self }
193    }
194}
195
196struct LexerIter<T: BufRead> {
197    lexer: Lexer<T>,
198}
199
200impl<T: BufRead> Iterator for LexerIter<T> {
201    type Item = Option<Token>;
202    fn next(&mut self) -> Option<Self::Item> {
203        match self.lexer.eof() {
204            true => None,
205            false => Some(self.lexer.next_token()),
206        }
207    }
208}
209
210impl<T: BufRead> IntoIterator for Lexer<T> {
211    type Item = Option<Token>;
212    type IntoIter = LexerIter<T>;
213    fn into_iter(self) -> Self::IntoIter {
214        LexerIter { lexer: self }
215    }
216}
217
218struct LexerRefIter<'a, T: BufRead> {
219    lexer: &'a mut Lexer<T>,
220}
221
222impl<'a, T: BufRead> Iterator for LexerRefIter<'a, T> {
223    type Item = Option<Token>;
224    fn next(&mut self) -> Option<Self::Item> {
225        match self.lexer.eof() {
226            true => None,
227            false => Some(self.lexer.next_token()),
228        }
229    }
230}
231
232impl<'a, T: BufRead> IntoIterator for &'a mut Lexer<T> {
233    type Item = Option<Token>;
234    type IntoIter = LexerRefIter<'a, T>;
235    fn into_iter(self) -> Self::IntoIter {
236        LexerRefIter { lexer: self }
237    }
238}
239struct Parser<T: BufRead> {
240    lexer: Lexer<T>,
241}
242
243impl<T: BufRead> Parser<T> {
244    fn from_lexer(lexer: Lexer<T>) -> Self {
245        Self { lexer }
246    }
247    fn from_reader(reader: T) -> Self {
248        Self::from_lexer(Lexer { reader })
249    }
250    fn parse_rec(
251        lexer_iter: &mut LexerRefIter<T>,
252        is_loop: bool,
253    ) -> Result<Option<Vec<Statement>>> {
254        let mut result: Vec<Statement> = Vec::new();
255        while let Some(opt_token) = lexer_iter.next() {
256            match opt_token {
257                Some(token) => match token {
258                    Token::Increment => result.push(Statement::Add(1)),
259                    Token::Decrement => result.push(Statement::Add(u8::MAX)),
260                    Token::ShiftLeft => result.push(Statement::MoveLeft(1)),
261                    Token::ShiftRight => result.push(Statement::MoveRight(1)),
262                    Token::PutChar => result.push(Statement::PutChar),
263                    Token::ReadChar => result.push(Statement::ReadChar),
264                    Token::StartLoop => {
265                        let opt_loop = Self::parse_rec(lexer_iter, true)?;
266                        if let Some(stmt_loop) = opt_loop {
267                            result.push(Statement::new_loop(stmt_loop));
268                        }
269                    }
270                    Token::EndLoop => {
271                        if is_loop {
272                            if result.is_empty() {
273                                return Ok(None);
274                            } else {
275                                return Ok(Some(result));
276                            }
277                        } else {
278                            return Err(Error::new(
279                                ErrorKind::InvalidData,
280                                "Error: ']' found with no matching '['.".to_string(),
281                            ));
282                        }
283                    }
284                },
285                None => {}
286            }
287        }
288        if is_loop {
289            Err(Error::new(
290                ErrorKind::InvalidData,
291                "Error: '[' found with no matching ']'.".to_string(),
292            ))
293        } else {
294            Ok(Some(result))
295        }
296    }
297
298    fn parse(&mut self) -> Result<Vec<Statement>> {
299        let lexer_iter: &mut LexerRefIter<T> = &mut self.lexer.iter();
300        let parsed_opt = Self::parse_rec(lexer_iter, false)?;
301        Ok(parsed_opt.unwrap_or_default())
302    }
303}
304
305struct Optimizer {
306    statements: Vec<Statement>,
307}
308
309impl Optimizer {
310    fn new(statements: Vec<Statement>) -> Self {
311        Self { statements }
312    }
313
314    fn generate_optimized_stmt(stmt_type: &Statement, value: &mut usize) -> Option<Statement> {
315        let result = match value {
316            0 => None,
317            _ => match stmt_type {
318                Statement::Add(_) => Some(Statement::Add(*value as u8)),
319                Statement::MoveLeft(_) => Some(Statement::MoveLeft(*value)),
320                Statement::MoveRight(_) => Some(Statement::MoveRight(*value)),
321                _ => None,
322            },
323        };
324        *value = 0;
325        result
326    }
327
328    fn optimize_rec(statements: &Vec<Statement>) -> Option<Vec<Statement>> {
329        let mut result: Vec<Statement> = Vec::new();
330        let mut stmt_count: usize = 0;
331        let mut last_statement = Statement::ReadChar;
332
333        for statement in statements {
334            if !statement.is_equal_type(&last_statement)
335                && (!statement.is_move() || !last_statement.is_move())
336            {
337                match Self::generate_optimized_stmt(&last_statement, &mut stmt_count) {
338                    Some(statement) => result.push(statement),
339                    None => {}
340                }
341            }
342            let mut cloned = statement.clone();
343            match statement {
344                Statement::MoveLeft(value) => match last_statement {
345                    Statement::MoveLeft(_) => {
346                        stmt_count += value;
347                    }
348                    Statement::MoveRight(_) => {
349                        if stmt_count < *value {
350                            stmt_count = value - stmt_count;
351                        } else {
352                            stmt_count -= value;
353                            cloned = last_statement.clone();
354                        }
355                    }
356                    _ => {
357                        stmt_count = *value;
358                    }
359                },
360                Statement::MoveRight(value) => match last_statement {
361                    Statement::MoveRight(_) => {
362                        stmt_count += value;
363                    }
364                    Statement::MoveLeft(_) => {
365                        if stmt_count < *value {
366                            stmt_count = value - stmt_count;
367                        } else {
368                            stmt_count -= value;
369                            cloned = last_statement.clone();
370                        }
371                    }
372                    _ => {
373                        stmt_count = *value;
374                    }
375                },
376                Statement::Add(value) => match last_statement {
377                    Statement::Add(_) => {
378                        stmt_count = value.wrapping_add(stmt_count as u8) as usize;
379                    }
380                    _ => {
381                        stmt_count = *value as usize;
382                    }
383                },
384                stmt @ (Statement::PutChar | Statement::ReadChar) => result.push(stmt.clone()),
385                Statement::Loop(code) => {
386                    if let Some(optimized) = Self::optimize_rec(&code) {
387                        result.push(Statement::new_loop(optimized));
388                    }
389                }
390            }
391            last_statement = cloned;
392        }
393        match Self::generate_optimized_stmt(&last_statement, &mut stmt_count) {
394            Some(statement) => result.push(statement),
395            None => {}
396        }
397        Some(result)
398    }
399
400    fn optimize_once(&mut self) {
401        let opt_result = Self::optimize_rec(&self.statements);
402        self.statements = opt_result.unwrap_or_default();
403    }
404
405    fn optimize(&mut self, max_iterations: u32) {
406        if max_iterations == 0 {
407            loop {
408                let previous = self.statements.clone();
409                self.optimize_once();
410                if self.statements == previous {
411                    break;
412                }
413            }
414        } else {
415            for _ in 0..max_iterations {
416                let previous = self.statements.clone();
417                self.optimize_once();
418                if self.statements == previous {
419                    break;
420                }
421            }
422        }
423    }
424
425    fn yield_back(self) -> Vec<Statement> {
426        self.statements
427    }
428}
429
430/// A brainfuck interpreter class that reads code from a file / [`BufRead`]
431/// instance, parses, optimizes and runs it.
432pub struct Interpreter<T: BufRead> {
433    parser: Parser<T>,
434    machine: BrainfuckMachine,
435    console: termios::Termios,
436}
437
438impl Interpreter<BufReader<File>> {
439    /// Creates a new [`Interpreter<BufReader<File>>`] instance wrapped in a
440    /// [`Result`] object. If there were any problems when reading a file
441    /// the function will return an [`std::io::Error`] instance.
442    pub fn from_file(file_name: &str, machine_size: usize) -> Result<Self> {
443        let path = Path::new(file_name);
444        if !path.is_file() {
445            return Err(Error::new(
446                ErrorKind::InvalidData,
447                format!("Data cannot be read from: {}", file_name),
448            ));
449        }
450        let file = File::open(path)?;
451        let reader: BufReader<File> = BufReader::new(file);
452        Ok(Self {
453            parser: Parser::<BufReader<File>>::from_reader(reader),
454            machine: BrainfuckMachine::new(machine_size),
455            console: termios::Termios::from_fd(0).unwrap(),
456        })
457    }
458}
459
460impl<T: BufRead> Interpreter<T> {
461    /// Creates a new [`Interpreter`] instance from a [`BufRead`] implementor
462    /// with a given tape size.
463    pub fn from_reader(reader: T, machine_size: usize) -> Self {
464        Self {
465            parser: Parser::from_reader(reader),
466            machine: BrainfuckMachine::new(machine_size),
467            console: termios::Termios::from_fd(0).unwrap(),
468        }
469    }
470
471    fn get_char(&mut self) -> char {
472        let stdout = io::stdout();
473        let mut buffer = [0; 1];
474        let mut reader = io::stdin();
475        stdout.lock().flush().unwrap();
476        reader.read_exact(&mut buffer).unwrap();
477        buffer[0] as char
478    }
479
480    fn enable_get_char_mode(&mut self) {
481        let mut new_termios = self.console.clone();
482        new_termios.c_lflag &= !(termios::ICANON);
483        termios::tcsetattr(
484            std::io::Stdin::as_raw_fd(&std::io::stdin()),
485            termios::TCSANOW,
486            &mut new_termios,
487        )
488        .unwrap();
489    }
490
491    fn disable_get_char_mode(&mut self) {
492        termios::tcsetattr(
493            std::io::Stdin::as_raw_fd(&std::io::stdin()),
494            termios::TCSANOW,
495            &self.console,
496        )
497        .unwrap();
498    }
499
500    /// Parses the code that was contained within the [`BufRead`] instance
501    /// passed to the constructor (or within a given file, if the
502    /// [`Interpreter::from_file`] constructor has been
503    /// called) and then runs it. This function returns an [`Ok(())`] instance
504    /// in case of no issues and a wrapped [`std::io::Error`] if there are any.
505    ///
506    /// [`Interpreter::from_file`]: ./struct.Interpreter.html#method.from_file
507    pub fn run(&mut self) -> Result<()> {
508        let statements = self.parser.parse()?;
509        self.run_code(&statements);
510        Ok(())
511    }
512
513    /// Parses the code that was contained within the [`BufRead`] instance
514    /// passed to the constructor (or within a given file, if the
515    /// [`Interpreter::from_file`] constructor has been
516    /// called) and then runs it with a given optimization level. The
517    /// `max_iterations` parameter specifies the maximum amount of optimization
518    /// iterations that will be run on the code. If `max_iterations` is equal
519    /// to `0`, then the code will be optimized fully. This function returns an
520    /// [`Ok(())`] instancein case of no issues and a wrapped
521    /// [`std::io::Error`] if there are any.
522    ///
523    /// [`Interpreter::from_file`]: ./struct.Interpreter.html#method.from_file
524    pub fn run_with_optimization(&mut self, max_iterations: u32) -> Result<()> {
525        let statements = self.parser.parse()?;
526        let mut optimizer = Optimizer::new(statements);
527        optimizer.optimize(max_iterations);
528        let statements = optimizer.yield_back();
529        self.run_code(&statements);
530        Ok(())
531    }
532
533    fn run_code(&mut self, statements: &Vec<Statement>) {
534        self.enable_get_char_mode();
535        for statement in statements {
536            match statement {
537                Statement::MoveLeft(value) => self.machine.move_left(*value),
538                Statement::MoveRight(value) => self.machine.move_right(*value),
539                Statement::Add(value) => self.machine.add(*value),
540                Statement::ReadChar => {
541                    let chr = self.get_char();
542                    self.machine.read_char(chr);
543                }
544                Statement::PutChar => {
545                    let chr = self.machine.put_char();
546                    print!("{}", chr);
547                }
548                Statement::Loop(boxed) => {
549                    while self.machine.check_loop() {
550                        self.run_code(boxed);
551                    }
552                }
553            }
554        }
555        self.disable_get_char_mode();
556    }
557
558    /// Returns a [`Vec<u8>`] instance represeting the tape of the underlying
559    /// [machine].
560    ///
561    /// [machine]: BrainfuckMachine
562    pub fn get_tape(&self) -> Vec<u8> {
563        self.machine.get_tape()
564    }
565}
566
567struct Code<'a> {
568    code: &'a Vec<Statement>,
569}
570impl<'a> Code<'a> {
571    fn generate_string(statements: &Vec<Statement>) -> String {
572        let mut info: String = String::new();
573        for statement in statements {
574            let to_push = match statement {
575                Statement::Add(value) => format!("{}+ ", *value),
576                Statement::MoveLeft(value) => format!("{}< ", *value),
577                Statement::MoveRight(value) => format!("{}> ", *value),
578                Statement::ReadChar => ", ".to_string(),
579                Statement::PutChar => ". ".to_string(),
580                Statement::Loop(boxed) => {
581                    let loop_stmt = boxed;
582                    format!("[ {}] ", Self::generate_string(&loop_stmt))
583                }
584            };
585            info.push_str(&to_push);
586        }
587        info.trim_end().to_string()
588    }
589}
590
591impl<'a> std::fmt::Debug for Code<'a> {
592    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
593        let info: String = Self::generate_string(self.code);
594        f.debug_struct("Code").field("code", &info).finish()
595    }
596}