bfmod/
lexer.rs

1
2#[derive(Debug)]
3pub enum Commands {
4    AddByte,
5    RemoveByte,
6    LoopBegin,
7    LoopEnd,
8    GoNext,
9    GoLast,
10    ReadByte,
11    WriteByte,
12}
13
14#[derive(Debug)]
15pub struct Token {
16    pub value: Commands,
17    pub line: i32,
18    pub column: i32,
19    pub repeats: i32
20}
21
22fn generate_optimization(index: usize, input: &str, ignore_list: &mut Vec<usize>, handle_char: char, give_val: Commands) -> Token {
23    let input_slice = &input[index..input.len()];
24    let mut total_found = 0;
25
26    for (i, char) in input_slice.chars().enumerate() {
27        if char == handle_char {
28            total_found += 1;
29            ignore_list.push(i+index);
30        } else {
31            break;
32        }
33    }
34
35    return Token{
36        value: give_val,
37        repeats: total_found,
38        line: 0,
39        column: 0
40    }
41}
42
43/// Lexerize the input.
44///
45/// # Arguments
46///
47/// * `input` (`&str`) - The brainfuck code will be lexerized.
48///
49/// # Examples
50///
51/// ```
52/// let tokens: Vec<Token> = bfmod::lexer::execute("
53///     >++++++++[<+++++++++>-]<.>++++[<+++++++>-]
54///     <+.+++++++..+++.>>++++++[<+++++++>-]<++.--
55///     ----------.>++++++[<+++++++++>-]<+.<.+++.-
56///     -----.--------.>>>++++[<++++++++>-]<+.");
57/// ```
58pub fn execute(input: &str) -> Vec<Token> {
59    let mut tokenized: Vec<Token> = Vec::new();
60    let mut line = 0;
61    let mut column = 0;
62    let mut ignore_list: Vec<usize> = Vec::new();
63
64    for (i, char) in input.chars().enumerate() {
65        if ignore_list.contains(&i) {
66            continue
67        }
68
69        match char {
70            '\n' => {
71                line += 1;
72                column = 0;
73            },
74            '+' => {
75                let mut optimized_token = generate_optimization(i, input, &mut ignore_list, '+', Commands::AddByte);
76                optimized_token.line = line;
77                optimized_token.column = column;
78
79                tokenized.push(optimized_token)
80            },
81            '-' => {
82                let mut optimized_token = generate_optimization(i, input, &mut ignore_list, '-', Commands::RemoveByte);
83                optimized_token.line = line;
84                optimized_token.column = column;
85
86                tokenized.push(optimized_token)
87            },
88            '[' => tokenized.push(Token{
89                value: Commands::LoopBegin,
90                line: line,
91                column: column,
92                repeats: 0
93            }),
94            ']' => tokenized.push(Token{
95                value: Commands::LoopEnd,
96                line: line,
97                column: column,
98                repeats: 0
99            }),
100            '>' => {
101                let mut optimized_token = generate_optimization(i, input, &mut ignore_list, '>', Commands::GoNext);
102                optimized_token.line = line;
103                optimized_token.column = column;
104
105                tokenized.push(optimized_token)
106            },
107            '<' => {
108                let mut optimized_token = generate_optimization(i, input, &mut ignore_list, '<', Commands::GoLast);
109                optimized_token.line = line;
110                optimized_token.column = column;
111
112                tokenized.push(optimized_token)
113            },
114            '.' => tokenized.push(Token{
115                value: Commands::WriteByte,
116                line: line,
117                column: column,
118                repeats: 0
119            }),
120            ',' => tokenized.push(Token{
121                value: Commands::ReadByte,
122                line: line,
123                column: column,
124                repeats: 0
125            }),
126            _ => {
127                column += 1;
128                continue;
129            }
130        };
131        
132        column += 1;
133    };
134
135    tokenized
136}
137
138/// Check brackets for lexerized input.
139///
140/// # Arguments
141///
142/// * `tokens` (`&Vec<Token>`) - The lexerized code will be checked.
143///
144/// # Examples
145///
146/// ```
147/// match bfmod::lexer::check_brackets(&tokens) {
148///     Ok(()) => {
149///         // ...
150///     },
151///     Err(msg) => println!(msg)
152/// }
153/// ```
154pub fn check_brackets(tokens: &Vec<Token>) -> Result<(), String> {
155    let mut loop_reference_count = 0;
156    let (mut open_brackets_line, mut open_brackets_column) = (0, 0);
157    let (mut close_brackets_line, mut close_brackets_column) = (0, 0);
158
159    for token in tokens {
160        match token.value {
161            Commands::LoopBegin => {
162                loop_reference_count += 1;
163                open_brackets_line = token.line;
164                open_brackets_column = token.column;
165            },
166            Commands::LoopEnd => {
167                loop_reference_count -= 1;
168                close_brackets_line = token.line;
169                close_brackets_column = token.column;
170            },
171            _ => continue
172        };
173    };
174
175    if loop_reference_count > 0 {
176        return Err(format!("un-closed loop at line {}, column {}.", open_brackets_line, open_brackets_column))
177    }
178
179    if loop_reference_count < 0 {
180        return Err(format!("closing the undefined loop at line {}, column {}.", close_brackets_line, close_brackets_column))
181    }
182
183    Ok(())
184}