rflex_lib/
generator.rs

1use std::{fs::File, error::Error};
2use std::io::Write;
3use crate::dfa::{DFA, self};
4
5pub struct Generator<'a> { 
6    dfa: &'a DFA,
7    file: File,
8    tabs: usize,
9}
10
11#[allow(dead_code)]
12impl<'a> Generator<'a> {
13    pub fn new(dfa: &'a DFA, outpath: String) -> Result<Self, Box<dyn Error>> {
14        return Ok(Generator { 
15            dfa,
16            file: File::create(outpath)?,
17            tabs: 0,
18        });
19    }
20    fn write_inline(&mut self, s: &str) -> Result<(), Box<dyn Error>> {
21        write!(self.file, "{}",s)?;
22        return Ok(());
23    }
24    fn write(&mut self, s: &str) -> Result<(), Box<dyn Error>> {
25        write!(self.file, "{}", &format!(
26            "{}{}", 
27            "\t".repeat(self.tabs),
28            s
29        ))?;
30        return Ok(());
31    }
32    fn writeln(&mut self, s: &str) -> Result<(), Box<dyn Error>> {
33        self.write(&format!("{s}\n"))?;
34        return Ok(());
35    }
36    fn write_vec(&mut self, strs: &[&str]) -> Result<(), Box<dyn Error>> {
37        for s in strs { self.writeln(s)?; }
38        return Ok(());
39    }
40    fn indent(&mut self)   { self.tabs += 1; }
41    fn unindent(&mut self) { self.tabs -= 1; }
42
43    pub fn generate(&mut self) -> Result<(), Box<dyn Error>> {
44        self.writeln("use std::fs;")?;
45        self.writeln("use Token::*;")?;
46        self.writeln("#[derive(Debug, PartialEq, Eq)]")?;
47        self.writeln("pub enum Token {")?;
48        self.indent();
49        for label in &self.dfa.labels {
50            if label.len() == 0 { continue; }
51            self.writeln(&format!("{label}(String),"))?;
52        }
53        self.writeln("EOF")?;
54        self.unindent();
55        self.writeln("}")?;
56        self.writeln("#[derive(Debug, PartialEq, Eq)]")?;
57        self.write_vec(&[
58            "pub struct TokenErr {",
59            "   pub error: String",
60            "}"
61        ])?;
62        self.write_vec(&[
63            "pub struct Lexer {",
64            "  chars:   Vec<char>,",
65            "  pos:     usize,",
66            "  begins:  Vec<usize>,",
67            "  tabs:    Vec<usize>,",
68            "  column:  usize,",
69            &format!("  accepts: [usize; {}]", self.dfa.ncount),
70            "}",
71            "impl Lexer {",
72            "    pub fn new(fname: &str) -> Result<Self, Box<dyn std::error::Error>> {",
73            "        let chars = fs::read_to_string(fname)?",
74            "            .chars()",
75            "            .collect();",
76            &self.gen_accepts(),
77            "        return Ok(Lexer { ",
78            "           chars,",
79            "           pos: 0,",
80            "           begins: vec![0; 1],",
81            "           tabs:   Vec::new(),",
82            "           column: 0,",
83            "           accepts",
84            "        });",
85            "    }",
86            "",
87            "   fn advance(&mut self) -> char {",
88            "       let c = self.chars[self.pos];",
89            "        match c {",
90            "           '\\n' => {",
91            "               self.column = 0;",
92            "               self.begins.push(self.pos + 1);",
93            "           },",
94            "           '\\t' => {",
95            "               self.tabs.push(self.column);",
96            "               self.column += 4 - (self.column % 4);",
97            "           }",
98            "           _ => self.column += 1",
99            "       }",
100            "       self.pos += 1;",
101            "       return c;",
102            "   }",
103            "   fn retract(&mut self) {",
104            "       self.pos -= 1;",
105            "       let c = self.chars[self.pos];",
106            "       match c {",
107            "           '\\n' => {",
108            "               self.begins.pop();",
109            "               self.column = self.pos - self.begins[self.begins.len() - 1];",
110            "           }",
111            "           '\\t' => {",
112            "               self.column = self.tabs.pop().unwrap();",
113            "           }",
114            "           _ => self.column -= 1",
115            "       }",
116            "   }",
117        ])?;
118        self.indent();
119        self.writeln("pub fn next(&mut self) -> Result<Token, TokenErr> {")?;
120        self.indent();
121        self.write_automota()?;
122        self.unindent();
123        self.writeln("}")?;
124        self.unindent();
125        self.writeln("}")?;
126        return Ok(());
127    }
128
129    fn write_automota(&mut self) -> Result<(), Box<dyn Error>> {
130        self.write_vec(&[
131            "if self.pos == self.chars.len() { return Ok(EOF); }",
132            "let mut stk: Vec<usize> = Vec::new();",
133            "let mut chars: Vec<char> = Vec::new();",
134            "let mut state: usize = 0;",
135            "loop {",
136        ])?;
137        self.indent();
138        self.writeln("if self.pos == self.chars.len() { break; }")?;
139        self.writeln("let c = self.advance();")?;
140        self.writeln("state = match state {")?;
141        self.indent();
142        for state in 0..self.dfa.ncount {
143            if state == self.dfa.dead {
144                self.write_vec(&[
145                    &format!("{} => {{", self.dfa.dead),
146                    "\tstk.push(state);",
147			        "\tchars.push(c);",
148                    "\tbreak;",
149                    "}"
150                ])?;
151            } else {
152                self.write_transitions(state)?;
153            }
154        }
155        self.writeln("_ => panic!(\"Invalid State!\")")?;
156        self.unindent();
157        self.writeln("};")?;
158        self.writeln("stk.push(state);")?;
159        self.writeln("chars.push(c);")?;
160        self.unindent();
161        self.writeln("}")?;
162        self.write_vec(&[
163            "while stk.len() > 0 &&",
164            "   self.accepts[stk[stk.len() - 1]] == 0 {",
165            "   stk.pop().unwrap();",
166            "   chars.pop().unwrap();",
167            "   self.retract();",
168            "}",
169            "if stk.len() == 0 {",
170            "    let start = self.begins[self.begins.len() - 1];",
171            "    let error_line: String = self.chars[start..]",
172            "        .iter()",
173            "        .take_while(|&&c| c != '\\n')",
174            "        .collect();",
175            "    return Err(TokenErr{error: format!(",
176            "        \"Failed to lex from: \\n{}\\n{}^\",",
177            "        error_line,",
178            "        \" \".repeat(self.column)",
179            "    )});",
180            "}"
181        ])?;
182        self.writeln("let word : String = chars.iter().collect();")?;
183        self.writeln("match self.accepts[stk[stk.len() - 1]] {")?;
184        self.indent();
185        for (idx, label) in self.dfa.labels.iter().enumerate() {
186            if label.len() == 0 { continue; }
187            self.writeln(&format!(
188                "{:<4} => return Ok({}(word)),",
189                idx + 1, self.dfa.labels[idx],
190            ))?;
191        }
192        self.writeln("_    => panic!(\"Invalid Accepting State\")")?;
193        self.unindent();
194        self.writeln("}")?;
195        return Ok(());
196    }
197
198    fn write_transitions(&mut self, state: usize) -> Result<(), Box<dyn Error>> {
199        self.writeln(&format!("{state} => match c {{"))?;
200        self.indent();
201        let mut j = 0;
202        while j < u8::MAX {
203            let nbr = self.dfa.jumps[state][j as usize];
204            // the only self-transitions are from whitespace.
205            if state == 0 && nbr == 0 {
206                self.writeln(&format!("\'{}\' => {},",
207                    escape(j as char),
208                    "continue"
209                ))?;
210                j += 1;
211                continue;
212            }
213            if nbr == dfa::NULL { j += 1; continue; };
214            if self.dfa.dead == nbr { j += 1; continue; }
215            //println!("{} - {}", self.dfa.dead, nbr);
216            let start =  j;
217            while j + 1 < u8::MAX &&
218                self.dfa.jumps[state][(j + 1) as usize] == nbr { 
219                j += 1 
220            }
221            //println!("{}", self.dfa.jumps[state][j as usize]);
222            if start == j {
223                self.writeln(&format!("\'{}\' => {},",
224                    escape(start as char),
225                    self.dfa.jumps[state][j as usize]
226                ))?;
227            } else if start + 1 == j {
228                self.writeln(&format!("\'{}\' | \'{}\' => {},",
229                    escape(start as char), 
230                    escape(j as char),
231                    self.dfa.jumps[state][j as usize]
232                ))?;
233            } else {
234                self.writeln(&format!(
235                    "\'{}\'..=\'{}\' => {},",
236                    escape(start as char),
237                    escape(j as char),
238                    self.dfa.jumps[state][j as usize]
239                ))?;
240            }
241            j += 1;
242        }
243        self.writeln(&format!("_ => {}", self.dfa.dead))?;
244        self.unindent();
245        self.writeln("},")?;
246        return Ok(());
247    }
248
249    fn gen_accepts(&self) -> String {
250        let mut res = "\t\tlet accepts = [\n".to_string();
251        let mut idx = 0;
252        while (self.dfa.ncount-idx) / 5 > 0 { 
253            for _ in 0..4 {
254                res.push_str(&format!("\t\t\t{:>4}, ", self.dfa.accepts[idx]));
255                idx += 1;
256            }
257            res.push_str(&format!("\t\t\t{:>4},\n", self.dfa.accepts[idx]));
258            idx += 1;
259        }
260        if self.dfa.ncount%5 > 0 {
261            for _ in 0..(self.dfa.ncount%5 - 1) {
262                res.push_str(&format!("\t\t\t{:>4}, ", self.dfa.accepts[idx]));
263                idx += 1
264            }
265            res.push_str(&format!("\t\t\t{:>4}\n", self.dfa.accepts[idx]));
266        }
267        res.push_str("\t\t];");
268        return res;
269    }
270}
271
272fn escape(c: char) -> String {
273    match c {
274        '\n' => return "\\n".to_string(),
275        '\t' => return "\\t".to_string(),
276        '\\' => "\\\\".to_string(),
277        '\'' => "\\'".to_string(),
278        '"' => "\\\"".to_string(),
279        '\r' => "\\r".to_string(),
280        _ => return c.to_string()
281    }
282}
283
284/* TODO - Lexing Testcases... */
285#[cfg(test)]
286mod tests {
287    use super::*;
288    use crate::{lexer::Lexer, parser::Parser, nfa::NFA};
289
290    #[allow(dead_code)]
291    fn visualize() {
292        let path = "example2.tk";
293        let lexer = Lexer::new(path).expect("Invalid Path");
294        let mut parser = Parser::new(lexer).expect("File should be non-empty!");
295        let nfa = NFA::build_from_matches(&parser.parse().expect("Invalid parse"));
296        let dfa = DFA::compress(DFA::subset_construction(nfa));
297        let mut gen = Generator::new(&dfa, "tests/tokenizer.rs".to_string())
298            .expect("Just Be Better");
299        gen.generate().expect("WORK");
300    }
301}