1use std::{fs::File, error::Error};
2use std::io::Write;
3use crate::dfa::{DFA, self};
4
5pub struct Generator<'a> {
6 dfa: &'a DFA,
7 file: File,
8 tabs: usize,
9}
10
11#[allow(dead_code)]
12impl<'a> Generator<'a> {
13 pub fn new(dfa: &'a DFA, outpath: String) -> Result<Self, Box<dyn Error>> {
14 return Ok(Generator {
15 dfa,
16 file: File::create(outpath)?,
17 tabs: 0,
18 });
19 }
20 fn write_inline(&mut self, s: &str) -> Result<(), Box<dyn Error>> {
21 write!(self.file, "{}",s)?;
22 return Ok(());
23 }
24 fn write(&mut self, s: &str) -> Result<(), Box<dyn Error>> {
25 write!(self.file, "{}", &format!(
26 "{}{}",
27 "\t".repeat(self.tabs),
28 s
29 ))?;
30 return Ok(());
31 }
32 fn writeln(&mut self, s: &str) -> Result<(), Box<dyn Error>> {
33 self.write(&format!("{s}\n"))?;
34 return Ok(());
35 }
36 fn write_vec(&mut self, strs: &[&str]) -> Result<(), Box<dyn Error>> {
37 for s in strs { self.writeln(s)?; }
38 return Ok(());
39 }
40 fn indent(&mut self) { self.tabs += 1; }
41 fn unindent(&mut self) { self.tabs -= 1; }
42
43 pub fn generate(&mut self) -> Result<(), Box<dyn Error>> {
44 self.writeln("use std::fs;")?;
45 self.writeln("use Token::*;")?;
46 self.writeln("#[derive(Debug, PartialEq, Eq)]")?;
47 self.writeln("pub enum Token {")?;
48 self.indent();
49 for label in &self.dfa.labels {
50 if label.len() == 0 { continue; }
51 self.writeln(&format!("{label}(String),"))?;
52 }
53 self.writeln("EOF")?;
54 self.unindent();
55 self.writeln("}")?;
56 self.writeln("#[derive(Debug, PartialEq, Eq)]")?;
57 self.write_vec(&[
58 "pub struct TokenErr {",
59 " pub error: String",
60 "}"
61 ])?;
62 self.write_vec(&[
63 "pub struct Lexer {",
64 " chars: Vec<char>,",
65 " pos: usize,",
66 " begins: Vec<usize>,",
67 " tabs: Vec<usize>,",
68 " column: usize,",
69 &format!(" accepts: [usize; {}]", self.dfa.ncount),
70 "}",
71 "impl Lexer {",
72 " pub fn new(fname: &str) -> Result<Self, Box<dyn std::error::Error>> {",
73 " let chars = fs::read_to_string(fname)?",
74 " .chars()",
75 " .collect();",
76 &self.gen_accepts(),
77 " return Ok(Lexer { ",
78 " chars,",
79 " pos: 0,",
80 " begins: vec![0; 1],",
81 " tabs: Vec::new(),",
82 " column: 0,",
83 " accepts",
84 " });",
85 " }",
86 "",
87 " fn advance(&mut self) -> char {",
88 " let c = self.chars[self.pos];",
89 " match c {",
90 " '\\n' => {",
91 " self.column = 0;",
92 " self.begins.push(self.pos + 1);",
93 " },",
94 " '\\t' => {",
95 " self.tabs.push(self.column);",
96 " self.column += 4 - (self.column % 4);",
97 " }",
98 " _ => self.column += 1",
99 " }",
100 " self.pos += 1;",
101 " return c;",
102 " }",
103 " fn retract(&mut self) {",
104 " self.pos -= 1;",
105 " let c = self.chars[self.pos];",
106 " match c {",
107 " '\\n' => {",
108 " self.begins.pop();",
109 " self.column = self.pos - self.begins[self.begins.len() - 1];",
110 " }",
111 " '\\t' => {",
112 " self.column = self.tabs.pop().unwrap();",
113 " }",
114 " _ => self.column -= 1",
115 " }",
116 " }",
117 ])?;
118 self.indent();
119 self.writeln("pub fn next(&mut self) -> Result<Token, TokenErr> {")?;
120 self.indent();
121 self.write_automota()?;
122 self.unindent();
123 self.writeln("}")?;
124 self.unindent();
125 self.writeln("}")?;
126 return Ok(());
127 }
128
129 fn write_automota(&mut self) -> Result<(), Box<dyn Error>> {
130 self.write_vec(&[
131 "if self.pos == self.chars.len() { return Ok(EOF); }",
132 "let mut stk: Vec<usize> = Vec::new();",
133 "let mut chars: Vec<char> = Vec::new();",
134 "let mut state: usize = 0;",
135 "loop {",
136 ])?;
137 self.indent();
138 self.writeln("if self.pos == self.chars.len() { break; }")?;
139 self.writeln("let c = self.advance();")?;
140 self.writeln("state = match state {")?;
141 self.indent();
142 for state in 0..self.dfa.ncount {
143 if state == self.dfa.dead {
144 self.write_vec(&[
145 &format!("{} => {{", self.dfa.dead),
146 "\tstk.push(state);",
147 "\tchars.push(c);",
148 "\tbreak;",
149 "}"
150 ])?;
151 } else {
152 self.write_transitions(state)?;
153 }
154 }
155 self.writeln("_ => panic!(\"Invalid State!\")")?;
156 self.unindent();
157 self.writeln("};")?;
158 self.writeln("stk.push(state);")?;
159 self.writeln("chars.push(c);")?;
160 self.unindent();
161 self.writeln("}")?;
162 self.write_vec(&[
163 "while stk.len() > 0 &&",
164 " self.accepts[stk[stk.len() - 1]] == 0 {",
165 " stk.pop().unwrap();",
166 " chars.pop().unwrap();",
167 " self.retract();",
168 "}",
169 "if stk.len() == 0 {",
170 " let start = self.begins[self.begins.len() - 1];",
171 " let error_line: String = self.chars[start..]",
172 " .iter()",
173 " .take_while(|&&c| c != '\\n')",
174 " .collect();",
175 " return Err(TokenErr{error: format!(",
176 " \"Failed to lex from: \\n{}\\n{}^\",",
177 " error_line,",
178 " \" \".repeat(self.column)",
179 " )});",
180 "}"
181 ])?;
182 self.writeln("let word : String = chars.iter().collect();")?;
183 self.writeln("match self.accepts[stk[stk.len() - 1]] {")?;
184 self.indent();
185 for (idx, label) in self.dfa.labels.iter().enumerate() {
186 if label.len() == 0 { continue; }
187 self.writeln(&format!(
188 "{:<4} => return Ok({}(word)),",
189 idx + 1, self.dfa.labels[idx],
190 ))?;
191 }
192 self.writeln("_ => panic!(\"Invalid Accepting State\")")?;
193 self.unindent();
194 self.writeln("}")?;
195 return Ok(());
196 }
197
198 fn write_transitions(&mut self, state: usize) -> Result<(), Box<dyn Error>> {
199 self.writeln(&format!("{state} => match c {{"))?;
200 self.indent();
201 let mut j = 0;
202 while j < u8::MAX {
203 let nbr = self.dfa.jumps[state][j as usize];
204 if state == 0 && nbr == 0 {
206 self.writeln(&format!("\'{}\' => {},",
207 escape(j as char),
208 "continue"
209 ))?;
210 j += 1;
211 continue;
212 }
213 if nbr == dfa::NULL { j += 1; continue; };
214 if self.dfa.dead == nbr { j += 1; continue; }
215 let start = j;
217 while j + 1 < u8::MAX &&
218 self.dfa.jumps[state][(j + 1) as usize] == nbr {
219 j += 1
220 }
221 if start == j {
223 self.writeln(&format!("\'{}\' => {},",
224 escape(start as char),
225 self.dfa.jumps[state][j as usize]
226 ))?;
227 } else if start + 1 == j {
228 self.writeln(&format!("\'{}\' | \'{}\' => {},",
229 escape(start as char),
230 escape(j as char),
231 self.dfa.jumps[state][j as usize]
232 ))?;
233 } else {
234 self.writeln(&format!(
235 "\'{}\'..=\'{}\' => {},",
236 escape(start as char),
237 escape(j as char),
238 self.dfa.jumps[state][j as usize]
239 ))?;
240 }
241 j += 1;
242 }
243 self.writeln(&format!("_ => {}", self.dfa.dead))?;
244 self.unindent();
245 self.writeln("},")?;
246 return Ok(());
247 }
248
249 fn gen_accepts(&self) -> String {
250 let mut res = "\t\tlet accepts = [\n".to_string();
251 let mut idx = 0;
252 while (self.dfa.ncount-idx) / 5 > 0 {
253 for _ in 0..4 {
254 res.push_str(&format!("\t\t\t{:>4}, ", self.dfa.accepts[idx]));
255 idx += 1;
256 }
257 res.push_str(&format!("\t\t\t{:>4},\n", self.dfa.accepts[idx]));
258 idx += 1;
259 }
260 if self.dfa.ncount%5 > 0 {
261 for _ in 0..(self.dfa.ncount%5 - 1) {
262 res.push_str(&format!("\t\t\t{:>4}, ", self.dfa.accepts[idx]));
263 idx += 1
264 }
265 res.push_str(&format!("\t\t\t{:>4}\n", self.dfa.accepts[idx]));
266 }
267 res.push_str("\t\t];");
268 return res;
269 }
270}
271
272fn escape(c: char) -> String {
273 match c {
274 '\n' => return "\\n".to_string(),
275 '\t' => return "\\t".to_string(),
276 '\\' => "\\\\".to_string(),
277 '\'' => "\\'".to_string(),
278 '"' => "\\\"".to_string(),
279 '\r' => "\\r".to_string(),
280 _ => return c.to_string()
281 }
282}
283
284#[cfg(test)]
286mod tests {
287 use super::*;
288 use crate::{lexer::Lexer, parser::Parser, nfa::NFA};
289
290 #[allow(dead_code)]
291 fn visualize() {
292 let path = "example2.tk";
293 let lexer = Lexer::new(path).expect("Invalid Path");
294 let mut parser = Parser::new(lexer).expect("File should be non-empty!");
295 let nfa = NFA::build_from_matches(&parser.parse().expect("Invalid parse"));
296 let dfa = DFA::compress(DFA::subset_construction(nfa));
297 let mut gen = Generator::new(&dfa, "tests/tokenizer.rs".to_string())
298 .expect("Just Be Better");
299 gen.generate().expect("WORK");
300 }
301}