glr_parser/glr_grammar.rs
1#![allow(unused_imports)]
2#![allow(unused_variables)]
3#![allow(unused_mut)]
4#![allow(dead_code)]
5
6use std::rc::{Rc, Weak};
7use std::sync::Arc;
8use std::collections::HashMap;
9use std::collections::hash_map::Entry::{Occupied, Vacant};
10
11#[derive(Debug,Clone,Hash,PartialEq,Eq,PartialOrd,Ord)]
12pub enum Atom {
13 Symbol(Arc<String>),
14 Terminal(Arc<String>)
15}
16
17
18#[derive(Debug,Clone,Hash,PartialEq,Eq,PartialOrd,Ord)]
19pub struct GrammarItem {
20 pub symbol: Arc<Atom>,
21 pub production: Vec<Arc<Atom>>
22}
23
24pub fn get_atom_string(atom: Arc<Atom>) -> String {
25 match *atom {Atom::Symbol(ref x) => x.to_string(), Atom::Terminal(ref x) => x.to_string()}
26}
27pub fn grammar_gen(_grammar_str: &str, terminal_tokens: Rc<Vec<String>>) -> Vec<Box<GrammarItem>>{
28 let grammar_str = _grammar_str.to_string() + "\n";
29 let mut ret: Vec<Box<GrammarItem>> = vec![];
30
31 let mut grammar_notes: HashMap<Arc<Atom>, Vec<Vec<Arc<Atom>>>> = HashMap::new();
32
33 #[derive(Debug,Clone)]
34 enum Mode {
35 InGroup, InString, Collect, Expect
36 }
37 let mut word: String = String::new();
38 let mut mode: Mode = Mode::Expect;
39 let mut pl_num = 0u8;
40
41 fn plain_last_grammar(ret: &mut Vec<Box<GrammarItem>>, cache: &HashMap<Arc<Atom>, Vec<Vec<Arc<Atom>>>>) {
42 // println!("ret - {:?}", ret);
43 fn get_from_cache(orig: &Arc<Atom>, entry: &Vec<Vec<Arc<Atom>>>, cache: &HashMap<Arc<Atom>, Vec<Vec<Arc<Atom>>>>) -> Vec<Vec<Arc<Atom>>> {
44 println!("get_from_cache start {:?}", entry);
45
46 let mut ret = vec![];
47 for entry_item in entry.iter() {
48 let mut new_entry: Vec<Vec<Arc<Atom>>> = vec![vec![]];
49
50 for atom in entry_item.iter() {
51 if orig != atom {
52 match cache.get(atom) {
53 Some(x) => {
54 let mut _new_entry: Vec<Vec<Arc<Atom>>> = vec![];
55 for new_entry_item in new_entry.iter() {
56 for _x in x.iter() {
57 let mut _entry: Vec<Arc<Atom>> = new_entry_item.clone();
58 _entry.extend(_x.clone());
59 _new_entry.push(_entry);
60 }
61 }
62 new_entry = _new_entry;
63 },
64 None => {
65 for index in 0..new_entry.len() {
66 new_entry.get_mut(index).unwrap().push(atom.clone());
67 }
68 }
69 }
70 } else {
71 for index in 0..new_entry.len() {
72 new_entry.get_mut(index).unwrap().push(atom.clone());
73 }
74 }
75 }
76
77 ret.extend(new_entry);
78 }
79
80 println!("get_from_cache {:?}", ret);
81 ret
82 }
83 if ret.len() > 0 {
84 println!("inner - {:?}", 1);
85 match ret.pop() {
86 None => {},
87 Some(mut x) => {
88 let mut _x = x.clone();
89 _x.production = vec![];
90 let mut _xs = vec![_x.clone()];
91
92 for item in x.production.iter() {
93 println!("item - {:?}", item);
94 match cache.get(item) {
95 Some(entry) => {
96 let mut new_xs = vec![];
97 // not using all plain grammar
98 for entry_item in get_from_cache(item, entry, cache).iter() {
99 // for entry_item in entry.iter() {
100 for _x in _xs.iter() {
101 let mut _new_x = _x.clone();
102 _new_x.production.extend(entry_item.clone());
103 new_xs.push(_new_x);
104 }
105 }
106 _xs = new_xs;
107 },
108 None => {
109 let mut new_xs = vec![];
110 for _x in _xs.iter() {
111 let mut __x = _x.clone();
112 __x.production.push(item.clone());
113 new_xs.push(__x);
114 }
115 _xs = new_xs;
116 }
117 }
118 }
119
120 for item in _xs.iter() {
121 ret.push(item.clone());
122 }
123
124 }
125 }
126 }
127 }
128 for _char in grammar_str.chars() {
129 // println!("<-{:?}", (_char, word.clone()));
130 match (mode.clone(), _char) {
131 (Mode::InGroup, '(') => {
132 word.push('(');
133 pl_num += 1;
134 },
135 (Mode::InGroup, ')') => {
136 pl_num -= 1;
137
138 if pl_num != 0 {word.push(')')}
139 else {
140 match ret.pop() {
141 None => {},
142 Some(mut x) => {
143 let inner_symbol = "".to_string() + &get_atom_string(x.clone().symbol) + &x.production.len().to_string() + "~";
144 let inner_gs = grammar_gen(&(inner_symbol.clone() + " = " + &word), terminal_tokens.clone());
145
146 for item in inner_gs.iter() {
147 match grammar_notes.entry(item.clone().symbol) {
148 Occupied(_entry) => {
149 _entry.into_mut().push(item.clone().production);
150 },
151 Vacant(_entry) => {
152 _entry.insert(vec![item.clone().production]);
153 }
154 }
155 }
156 ret.extend(inner_gs);
157 ret.push(x);
158 word = inner_symbol;
159 mode = Mode::Collect;
160 }
161 }
162 }
163
164
165 },
166
167
168
169 (Mode::InString, '\'') => {
170 mode = Mode::Collect;
171 if word.len() > 0 {
172 match ret.pop() {
173 None => {},
174 Some(mut x) => {
175 x.production.push(Arc::new(Atom::Terminal(Arc::new(word))));
176 ret.push(x);
177 }
178 }
179 word = String::new();
180 }
181 },
182
183
184 (Mode::Collect, ' ') | (Mode::Collect, '\r') | (Mode::Collect, '\n') | (Mode::Collect, '\t') => {
185 if word.len() > 0 {
186 match ret.pop() {
187 None => {},
188 Some(mut x) => {
189 let push_atom = if terminal_tokens.contains(&word) {
190 Arc::new(Atom::Terminal(Arc::new(word)))
191 } else {
192 Arc::new(Atom::Symbol(Arc::new(word)))
193 };
194 x.production.push(push_atom);
195 // match group_cache.get(&Arc::new(Atom::Symbol(Arc::new(word.clone())))) {
196 // Some(entry) => {
197 // for item in entry.iter(){
198 // x.production.extend(entry.clone());
199 // }
200 // },
201 // None => {
202 // }
203 // }
204
205 ret.push(x);
206 }
207 }
208 word = String::new();
209 }
210
211 match _char {
212 '\r' | '\n' => {
213 mode = Mode::Expect;
214 },
215 _ => {}
216 }
217 },
218 (Mode::Collect, '(') => {
219 pl_num += 1;
220 mode = Mode::InGroup;
221 },
222 (Mode::Collect, '|') | (Mode::Expect, '|') => {
223 if word.len() > 0 { // same as above ' ' matching
224 match ret.pop() {
225 None => {},
226 Some(mut x) => {
227 let push_atom = if terminal_tokens.contains(&word) {
228 Arc::new(Atom::Terminal(Arc::new(word)))
229 } else {
230 Arc::new(Atom::Symbol(Arc::new(word)))
231 };
232 x.production.push(push_atom);
233
234 ret.push(x);
235 }
236 }
237 word = String::new();
238 }
239
240 match ret.pop() {
241 None => {},
242 Some(mut x) => {
243 ret.push(x.clone());
244 x.production = vec![];
245 ret.push(x);
246 }
247 }
248 mode = Mode::Collect;
249 },
250 (Mode::Collect, '\'') => {
251 mode = Mode::InString;
252 },
253 (Mode::Collect, '+') | (Mode::Collect, '*') | (Mode::Collect, '?') => {
254 match ret.pop() {
255 None => {},
256 Some(mut x) => {
257 let new_x_symbol = Arc::new(Atom::Symbol(Arc::new("~".to_string() + &x.production.len().to_string() + &get_atom_string(x.clone().symbol))));
258
259 let word_push: Arc<Atom>;
260 if word.len() > 0 {
261 let push_atom = if terminal_tokens.contains(&word) {
262 Arc::new(Atom::Terminal(Arc::new(word)))
263 } else {
264 Arc::new(Atom::Symbol(Arc::new(word)))
265 };
266 word_push = push_atom;
267 } else {
268 match x.production.pop() {
269 Some(x) => {
270 word_push = x;
271 },
272 None => {
273 word_push = Arc::new(Atom::Terminal(Arc::new("".to_string())));
274 }
275 }
276 }
277
278 ret.push(Box::new(GrammarItem {symbol: new_x_symbol.clone(), production: vec![word_push.clone()]}));
279 match grammar_notes.entry(new_x_symbol.clone()) {
280 Occupied(_entry) => {
281 _entry.into_mut().push(vec![word_push.clone()]);
282 },
283 Vacant(_entry) => {
284 _entry.insert(vec![vec![word_push.clone()]]);
285 }
286 }
287 if _char == '?' || _char == '*' {
288 ret.push(Box::new(GrammarItem {symbol: new_x_symbol.clone(), production: vec![]}));
289 match grammar_notes.entry(new_x_symbol.clone()) {
290 Occupied(_entry) => {
291 _entry.into_mut().push(vec![]);
292 },
293 Vacant(_entry) => {
294 _entry.insert(vec![vec![]]);
295 }
296 }
297 }
298
299 if _char == '+' || _char == '*' {
300 ret.push(Box::new(GrammarItem {symbol: new_x_symbol.clone(), production: vec![new_x_symbol.clone(), word_push.clone()]}));
301 match grammar_notes.entry(new_x_symbol.clone()) {
302 Occupied(_entry) => {
303 _entry.into_mut().push(vec![new_x_symbol.clone(), word_push]);
304 },
305 Vacant(_entry) => {
306 _entry.insert(vec![vec![new_x_symbol.clone(), word_push]]);
307 }
308 }
309 }
310
311
312 x.production.push(new_x_symbol.clone());
313 ret.push(x);
314 word = String::new();
315 // let mut new_x = x.clone();
316 // let new_symbol_atom = Arc::new(Atom::Symbol(Arc::new("~".to_string() + &new_x.clone().production.len().to_string() + &get_atom_string(new_x.clone().symbol))));
317 // new_x.symbol = new_symbol_atom.clone();
318
319 // let word_push: Arc<Atom>;
320 // if word.len() > 0 {
321 // word_push = Arc::new(Atom::Symbol(Arc::new(word)));
322 // } else {
323 // match x.production.pop() {
324 // Some(x) => {
325 // new_x.production.pop();
326 // word_push = x;
327 // },
328 // None => {
329 // word_push = Arc::new(Atom::Terminal(Arc::new("".to_string())));
330 // }
331 // }
332 // }
333
334 // if _char != '+' {ret.push(new_x.clone())}
335 // new_x.production.push(word_push.clone());
336 // ret.push(new_x);
337
338 // if _char != '?' {
339 // ret.push(Box::new(GrammarItem {symbol: new_symbol_atom.clone(), production: vec![new_symbol_atom.clone(), word_push]}));
340 // }
341
342 // x.production = vec![new_symbol_atom];
343 // ret.push(x);
344
345 // word = String::new();
346 }
347 }
348
349 },
350
351
352 (Mode::Expect, '=') => {
353 // println!("->{:?}", word);
354 // plain_last_grammar(&mut ret, &grammar_notes);
355 ret.push(Box::new(GrammarItem {symbol: Arc::new(Atom::Symbol(Arc::new(word))), production: vec![]}));
356 word = String::new();
357 mode = Mode::Collect;
358 },
359 (Mode::Expect, ' ') | (Mode::Expect, '\r') | (Mode::Expect, '\n') | (Mode::Expect, '\t') => {
360 },
361
362 (m, x) => {
363 // println!("{:?}", m);
364 word.push(x);
365 }
366 }
367 }
368
369 // plain_last_grammar(&mut ret, &grammar_notes);
370
371 ret
372}
373
374fn sequitur(input: Vec<Arc<Atom>>) -> Vec<GrammarItem>{
375 #[derive(Debug,Clone,Hash,PartialEq,Eq,PartialOrd,Ord)]
376 struct AtomPair(Arc<Atom>, Arc<Atom>);
377
378 fn gen_grammars(prod: Vec<Arc<Atom>>, counter: &mut HashMap<AtomPair, u16>, n_count: &mut Box<u16>) -> (Vec<Arc<Atom>>, Vec<GrammarItem>) {
379 let mut new_prod: Vec<Arc<Atom>> = vec![];
380 let mut _ret: Vec<GrammarItem> = vec![];
381 let mut counter = counter;
382
383 if prod.len() < 2 {
384 return (prod, _ret)
385 }
386
387 for index in 0..prod.len() - 1 {
388 if let (Some(p1), Some(p2)) = (prod.get(index), prod.get(index + 1)) {
389 match counter.entry(AtomPair(p1.clone(), p2.clone())) {
390 Occupied(entry) => {
391 if p1 == p2 {continue}
392
393 let mut count = entry.into_mut();
394 **n_count += 1;
395 *count = **n_count;
396 println!("{:?}", (**n_count, p1.clone(), p2.clone()));
397 _ret.push(GrammarItem {
398 symbol: Arc::new(Atom::Symbol(Arc::new("N".to_string() + &n_count.to_string()))),
399 production: vec![p1.clone(), p2.clone()]
400 });
401 },
402 Vacant(entry) => {
403 entry.insert(1);
404 }
405 }
406 }
407 }
408
409 // for item in counter.iter() {
410 // let symbol_str = match *item.symbol {Atom::Symbol(ref x) => x, Atom::Terminal(ref x) => x};
411 // for index in 0..item.production.len() - 1 {
412 // if let (Some(p1), Some(p2)) = (item.production.get(index), item.production.get(index + 1)) {
413 // match counter.entry(AtomPair(p1.clone(), p2.clone())) {
414 // Occupied(entry) => {
415 // println!("o item {:?}", item);
416 // let mut count = entry.into_mut();
417 // if let Ok(x) = symbol_str.parse::<u16>() {
418 // *count = x;
419 // }
420 // },
421 // Vacant(entry) => {
422 // println!("v item {:?}", item);
423 // if let Ok(x) = symbol_str.parse::<u16>() {
424 // entry.insert(x);
425 // }
426 // }
427 // }
428 // }
429 // }
430 // }
431
432 let mut _pass = false;
433 let mut counter_remover: Vec<AtomPair> = vec![];
434 for index in 0..prod.len() - 1 {
435 if _pass {_pass = false; continue}
436
437 if let (Some(p1), Some(p2)) = (prod.get(index), prod.get(index + 1)) {
438 match counter.entry(AtomPair(p1.clone(), p2.clone())) {
439 Occupied(entry) => {
440 let count = entry.get();
441 if *count <= 1 {
442 new_prod.push(p1.clone());
443 if index == prod.len() - 2 {
444 new_prod.push(p2.clone());
445 }
446 counter_remover.push(AtomPair(p1.clone(), p2.clone()));
447 } else {
448 new_prod.push(Arc::new(Atom::Symbol(Arc::new("N".to_string() + &*count.to_string()))));
449 _pass = true;
450 }
451
452
453 },
454 Vacant(entry) => {}
455 }
456 }
457 }
458 for item in counter_remover.iter() {
459 counter.remove(&item);
460 }
461
462 // println!("counter {:?}", counter);
463
464 if new_prod == prod {
465 (new_prod, _ret)
466 } else {
467 let (_new_prod, _new_ret) = gen_grammars(new_prod, &mut counter, n_count);
468 _ret.extend(_new_ret);
469 (_new_prod, _ret)
470 // (new_prod, _ret)
471 }
472 }
473
474 let mut n_count = Box::new(1u16);
475 let mut ret: Vec<GrammarItem> = vec![];
476 let mut counter: HashMap<AtomPair, u16> = HashMap::new();
477
478 for input_item in input.iter() {
479 // let input_item = Arc::new(Atom::Terminal(Arc::new(_char.clone().to_string())));
480
481 let mut prod: Vec<Arc<Atom>> = vec![];
482 if ret.len() == 0 {
483 prod = vec![input_item.clone()];
484 ret.push(GrammarItem {symbol: Arc::new(Atom::Symbol(Arc::new("N1".to_string()))),
485 production: vec![input_item.clone()]});
486 } else if let Some(n1) = ret.get_mut(0) {
487 n1.production.push(input_item.clone());
488 prod = n1.production.clone();
489 }
490
491 let (new_prod, new_grammars) = gen_grammars(prod, &mut counter, &mut n_count);
492 if let Some(n1) = ret.get_mut(0) {
493 n1.production = new_prod;
494 }
495 ret.extend(new_grammars);
496 }
497
498 ret.dedup();
499 ret
500}
501fn main(){
502 let test_str = "
503 Goal = List
504 List = Pair+
505 Pair = \'(\' Pair? \')\'
506 ";
507
508 let test_str2 = "
509 A = a c? d* (e (f \'g\')* h)+ i
510 ";
511
512 let test_str3 = "
513 Goal = Stmt
514 Stmt = \'if\' \'expr\' \'then\' Stmt (\'else\' Stmt)?
515 Stmt = \'S\'
516 ";
517
518 let test_str4 = "
519G = file_input
520
521file_input = simple_stmt
522
523simple_stmt = expr_stmt ( ';' expr_stmt )* ';'? 'NEWLINE'
524
525expr_stmt = atom ('augassign'('yield_expr'|'testlist')|('='('yield_expr'|atom))*)
526
527atom= 'NAME' | 'SHORT_STRING'
528
529 ";
530 // TODO: () * + ?
531 let result = grammar_gen(test_str4, Rc::new(vec![]));
532 println!("{:?}", result);
533 for item in result.iter() {
534 println!("{:?}", (item.clone().symbol, item.clone().production));
535 }
536 // let mut sequitur_input: Vec<Arc<Atom>> = vec![];
537 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("if".to_string()))));
538 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("expr".to_string()))));
539 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("then".to_string()))));
540 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("if".to_string()))));
541 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("expr".to_string()))));
542 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("then".to_string()))));
543 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("S".to_string()))));
544 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("else".to_string()))));
545 // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("S".to_string()))));
546
547
548 // let sequitur_result = sequitur(sequitur_input);
549 // println!("{:?}", sequitur_result);
550 // for item in result {
551 // let symbol = match item.symbol {Atom::Symbol(x) => x, Atom::Terminal(x) => "".to_string()};
552 // println!("{:?}", symbol);
553 // for p in item.production {
554 // let p_symbol = match p {Atom::Symbol(x) => ("S", x), Atom::Terminal(x) => ("T", x)};
555 // println!(" {:?}", p_symbol);
556 // }
557 // }
558
559 // A~ = b c
560 // A
561 // ~A = a A~ | ~A A~
562 // A = ~A e
563}