glr_parser/
glr_grammar.rs

1#![allow(unused_imports)]
2#![allow(unused_variables)]
3#![allow(unused_mut)]
4#![allow(dead_code)]
5
6use std::rc::{Rc, Weak};
7use std::sync::Arc;
8use std::collections::HashMap;
9use std::collections::hash_map::Entry::{Occupied, Vacant};
10
11#[derive(Debug,Clone,Hash,PartialEq,Eq,PartialOrd,Ord)]
12pub enum Atom {
13    Symbol(Arc<String>),
14    Terminal(Arc<String>)
15}
16
17
18#[derive(Debug,Clone,Hash,PartialEq,Eq,PartialOrd,Ord)]
19pub struct GrammarItem {
20    pub symbol: Arc<Atom>,
21    pub production: Vec<Arc<Atom>>
22}
23
24pub fn get_atom_string(atom: Arc<Atom>) -> String {
25    match *atom {Atom::Symbol(ref x) => x.to_string(), Atom::Terminal(ref x) => x.to_string()}
26}
27pub fn grammar_gen(_grammar_str: &str, terminal_tokens: Rc<Vec<String>>) -> Vec<Box<GrammarItem>>{
28    let grammar_str = _grammar_str.to_string() + "\n";
29    let mut ret: Vec<Box<GrammarItem>> = vec![];
30
31    let mut grammar_notes: HashMap<Arc<Atom>, Vec<Vec<Arc<Atom>>>> = HashMap::new();
32    
33    #[derive(Debug,Clone)]
34    enum Mode {
35        InGroup, InString, Collect, Expect   
36    }
37    let mut word: String = String::new();
38    let mut mode: Mode = Mode::Expect;
39    let mut pl_num = 0u8;
40
41    fn plain_last_grammar(ret: &mut Vec<Box<GrammarItem>>, cache: &HashMap<Arc<Atom>, Vec<Vec<Arc<Atom>>>>) {
42        // println!("ret - {:?}", ret);
43        fn get_from_cache(orig: &Arc<Atom>, entry: &Vec<Vec<Arc<Atom>>>, cache: &HashMap<Arc<Atom>, Vec<Vec<Arc<Atom>>>>) -> Vec<Vec<Arc<Atom>>> {
44            println!("get_from_cache start {:?}", entry);
45
46            let mut ret = vec![];
47            for entry_item in entry.iter() {
48                let mut new_entry: Vec<Vec<Arc<Atom>>> = vec![vec![]];
49                
50                for atom in entry_item.iter() {
51                    if orig != atom {
52                        match cache.get(atom) {
53                            Some(x) => {
54                                let mut _new_entry: Vec<Vec<Arc<Atom>>> = vec![];
55                                for new_entry_item in new_entry.iter() {
56                                    for _x in x.iter() {
57                                        let mut _entry: Vec<Arc<Atom>> = new_entry_item.clone();
58                                        _entry.extend(_x.clone());
59                                        _new_entry.push(_entry);
60                                    }
61                                }
62                                new_entry = _new_entry;
63                            },
64                            None => {
65                                for index in 0..new_entry.len() {
66                                    new_entry.get_mut(index).unwrap().push(atom.clone());
67                                }
68                            }
69                        }   
70                    } else {
71                        for index in 0..new_entry.len() {
72                            new_entry.get_mut(index).unwrap().push(atom.clone());
73                        }
74                    }
75                }
76
77                ret.extend(new_entry);
78            }
79
80            println!("get_from_cache {:?}", ret);
81            ret
82        }
83        if ret.len() > 0 {
84                        println!("inner - {:?}", 1);
85            match ret.pop() {
86                None => {},
87                Some(mut x) => {
88                    let mut _x = x.clone();
89                    _x.production = vec![];
90                    let mut _xs = vec![_x.clone()];
91
92                    for item in x.production.iter() {
93                        println!("item - {:?}", item);
94                        match cache.get(item) {
95                            Some(entry) => {
96                                let mut new_xs = vec![];
97                                // not using all plain grammar
98                                for entry_item in get_from_cache(item, entry, cache).iter() { 
99                                // for entry_item in entry.iter() {
100                                    for _x in _xs.iter() {
101                                        let mut _new_x = _x.clone();
102                                        _new_x.production.extend(entry_item.clone());
103                                        new_xs.push(_new_x);
104                                    }
105                                }
106                                _xs = new_xs;
107                            },
108                            None => {
109                                let mut new_xs = vec![];
110                                for _x in _xs.iter() {
111                                    let mut __x = _x.clone();
112                                    __x.production.push(item.clone());
113                                    new_xs.push(__x);
114                                }
115                                _xs = new_xs;
116                            }
117                        }
118                    }
119
120                    for item in _xs.iter() {
121                        ret.push(item.clone());
122                    }
123
124                }
125            }
126        }
127    }
128    for _char in grammar_str.chars() {
129        // println!("<-{:?}", (_char, word.clone()));
130        match (mode.clone(), _char) {
131            (Mode::InGroup, '(') => {
132                word.push('(');
133                pl_num += 1;
134            },
135            (Mode::InGroup, ')') => {
136                pl_num -= 1;
137                
138                if pl_num != 0 {word.push(')')}
139                else {
140                    match ret.pop() {
141                        None => {},
142                        Some(mut x) => {
143                            let inner_symbol = "".to_string() + &get_atom_string(x.clone().symbol) + &x.production.len().to_string() + "~";
144                            let inner_gs = grammar_gen(&(inner_symbol.clone() + " = " + &word), terminal_tokens.clone());
145
146                            for item in inner_gs.iter() {
147                                match grammar_notes.entry(item.clone().symbol) {
148                                    Occupied(_entry) => {
149                                        _entry.into_mut().push(item.clone().production);
150                                    },
151                                    Vacant(_entry) => {
152                                        _entry.insert(vec![item.clone().production]);
153                                    }
154                                }
155                            }
156                            ret.extend(inner_gs);
157                            ret.push(x);
158                            word = inner_symbol;
159                            mode = Mode::Collect;
160                        }
161                    }
162                }
163
164
165            },
166
167
168
169            (Mode::InString, '\'') => {
170                mode = Mode::Collect;
171                if word.len() > 0 {
172                    match ret.pop() {
173                        None => {},
174                        Some(mut x) => {
175                            x.production.push(Arc::new(Atom::Terminal(Arc::new(word))));
176                            ret.push(x);
177                        }
178                    }
179                    word = String::new();
180                }
181            },
182
183
184            (Mode::Collect, ' ') | (Mode::Collect, '\r') | (Mode::Collect, '\n') | (Mode::Collect, '\t') => {
185                if word.len() > 0 {
186                    match ret.pop() {
187                        None => {},
188                        Some(mut x) => {
189                            let push_atom = if terminal_tokens.contains(&word) {
190                                Arc::new(Atom::Terminal(Arc::new(word)))
191                            } else {
192                                Arc::new(Atom::Symbol(Arc::new(word)))
193                            };
194                            x.production.push(push_atom);
195                            // match group_cache.get(&Arc::new(Atom::Symbol(Arc::new(word.clone())))) {
196                            //     Some(entry) => {
197                            //         for item in entry.iter(){
198                            //             x.production.extend(entry.clone());
199                            //         }
200                            //     },
201                            //     None => {
202                            //     }
203                            // }
204                            
205                            ret.push(x);
206                        }
207                    }
208                    word = String::new();
209                }
210
211                match _char {
212                    '\r' | '\n' => {
213                        mode = Mode::Expect;
214                    },
215                    _ => {}
216                }
217            },
218            (Mode::Collect, '(') => {
219                pl_num += 1;
220                mode = Mode::InGroup;
221            },
222            (Mode::Collect, '|') | (Mode::Expect, '|') => {
223                if word.len() > 0 { // same as above ' ' matching
224                    match ret.pop() {
225                        None => {},
226                        Some(mut x) => {
227                            let push_atom = if terminal_tokens.contains(&word) {
228                                Arc::new(Atom::Terminal(Arc::new(word)))
229                            } else {
230                                Arc::new(Atom::Symbol(Arc::new(word)))
231                            };
232                            x.production.push(push_atom);
233                            
234                            ret.push(x);
235                        }
236                    }
237                    word = String::new();
238                }
239
240                match ret.pop() {
241                    None => {},
242                    Some(mut x) => {
243                        ret.push(x.clone());
244                        x.production = vec![];
245                        ret.push(x);
246                    }
247                }
248                mode = Mode::Collect;
249            },
250            (Mode::Collect, '\'') => {
251                mode = Mode::InString;
252            },
253            (Mode::Collect, '+') | (Mode::Collect, '*') | (Mode::Collect, '?') => {
254                match ret.pop() {
255                    None => {},
256                    Some(mut x) => {
257                        let new_x_symbol = Arc::new(Atom::Symbol(Arc::new("~".to_string() + &x.production.len().to_string() + &get_atom_string(x.clone().symbol))));
258
259                        let word_push: Arc<Atom>;
260                        if word.len() > 0 {
261                            let push_atom = if terminal_tokens.contains(&word) {
262                                Arc::new(Atom::Terminal(Arc::new(word)))
263                            } else {
264                                Arc::new(Atom::Symbol(Arc::new(word)))
265                            };
266                           word_push = push_atom;
267                        } else {
268                            match x.production.pop() {
269                                Some(x) => {
270                                    word_push = x;
271                                },
272                                None => {
273                                    word_push = Arc::new(Atom::Terminal(Arc::new("".to_string())));
274                                }
275                            }
276                        }
277
278                        ret.push(Box::new(GrammarItem {symbol: new_x_symbol.clone(), production: vec![word_push.clone()]}));
279                        match grammar_notes.entry(new_x_symbol.clone()) {
280                            Occupied(_entry) => {
281                                _entry.into_mut().push(vec![word_push.clone()]);
282                            },
283                            Vacant(_entry) => {
284                                _entry.insert(vec![vec![word_push.clone()]]);
285                            }
286                        }
287                        if _char == '?' || _char == '*' {
288                            ret.push(Box::new(GrammarItem {symbol: new_x_symbol.clone(), production: vec![]}));
289                            match grammar_notes.entry(new_x_symbol.clone()) {
290                                Occupied(_entry) => {
291                                    _entry.into_mut().push(vec![]);
292                                },
293                                Vacant(_entry) => {
294                                    _entry.insert(vec![vec![]]);
295                                }
296                            }
297                        } 
298
299                        if _char == '+' || _char == '*' {
300                            ret.push(Box::new(GrammarItem {symbol: new_x_symbol.clone(), production: vec![new_x_symbol.clone(), word_push.clone()]}));
301                            match grammar_notes.entry(new_x_symbol.clone()) {
302                                Occupied(_entry) => {
303                                    _entry.into_mut().push(vec![new_x_symbol.clone(), word_push]);
304                                },
305                                Vacant(_entry) => {
306                                    _entry.insert(vec![vec![new_x_symbol.clone(), word_push]]);
307                                }
308                            }
309                        }
310                   
311
312                        x.production.push(new_x_symbol.clone());
313                        ret.push(x);
314                        word = String::new();
315                        // let mut new_x = x.clone();
316                        // let new_symbol_atom = Arc::new(Atom::Symbol(Arc::new("~".to_string() + &new_x.clone().production.len().to_string() + &get_atom_string(new_x.clone().symbol))));
317                        // new_x.symbol = new_symbol_atom.clone();
318
319                        // let word_push: Arc<Atom>;
320                        // if word.len() > 0 {
321                        //    word_push = Arc::new(Atom::Symbol(Arc::new(word)));
322                        // } else {
323                        //     match x.production.pop() {
324                        //         Some(x) => {
325                        //             new_x.production.pop();
326                        //             word_push = x;
327                        //         },
328                        //         None => {
329                        //             word_push = Arc::new(Atom::Terminal(Arc::new("".to_string())));
330                        //         }
331                        //     }
332                        // }
333
334                        // if _char != '+' {ret.push(new_x.clone())}
335                        // new_x.production.push(word_push.clone());
336                        // ret.push(new_x);
337
338                        // if _char != '?' {
339                        //     ret.push(Box::new(GrammarItem {symbol: new_symbol_atom.clone(), production: vec![new_symbol_atom.clone(), word_push]}));
340                        // }
341
342                        // x.production = vec![new_symbol_atom];
343                        // ret.push(x);
344
345                        // word = String::new();
346                    }
347                }
348        
349            },
350
351
352            (Mode::Expect, '=') => {
353                // println!("->{:?}", word);
354                // plain_last_grammar(&mut ret, &grammar_notes);
355                ret.push(Box::new(GrammarItem {symbol: Arc::new(Atom::Symbol(Arc::new(word))), production: vec![]}));
356                word = String::new();
357                mode = Mode::Collect;
358            },
359            (Mode::Expect, ' ') | (Mode::Expect, '\r') | (Mode::Expect, '\n') | (Mode::Expect, '\t') => {
360            },
361
362            (m, x) => {
363                // println!("{:?}", m);
364                word.push(x);
365            }
366        }
367    }
368
369    // plain_last_grammar(&mut ret, &grammar_notes);
370
371    ret
372}
373
374fn sequitur(input: Vec<Arc<Atom>>) -> Vec<GrammarItem>{
375    #[derive(Debug,Clone,Hash,PartialEq,Eq,PartialOrd,Ord)]
376    struct AtomPair(Arc<Atom>, Arc<Atom>);
377
378    fn gen_grammars(prod: Vec<Arc<Atom>>, counter: &mut HashMap<AtomPair, u16>, n_count: &mut Box<u16>) -> (Vec<Arc<Atom>>, Vec<GrammarItem>) {
379        let mut new_prod: Vec<Arc<Atom>> = vec![];
380        let mut _ret: Vec<GrammarItem> = vec![];
381        let mut counter = counter;
382
383        if prod.len() < 2 {
384            return (prod, _ret)
385        }
386
387        for index in 0..prod.len() - 1 {
388            if let (Some(p1), Some(p2)) = (prod.get(index), prod.get(index + 1)) {
389                match counter.entry(AtomPair(p1.clone(), p2.clone())) {
390                    Occupied(entry) => {
391                        if p1 == p2 {continue}
392
393                        let mut count = entry.into_mut();
394                        **n_count += 1;
395                        *count = **n_count;
396                        println!("{:?}", (**n_count, p1.clone(), p2.clone()));
397                        _ret.push(GrammarItem {
398                            symbol: Arc::new(Atom::Symbol(Arc::new("N".to_string() + &n_count.to_string()))),
399                            production: vec![p1.clone(), p2.clone()]
400                        });
401                    },
402                    Vacant(entry) => {
403                        entry.insert(1);
404                    }
405                }
406            }
407        }
408
409        // for item in counter.iter() {
410        //     let symbol_str = match *item.symbol {Atom::Symbol(ref x) => x, Atom::Terminal(ref x) => x};
411        //     for index in 0..item.production.len() - 1 {
412        //         if let (Some(p1), Some(p2)) = (item.production.get(index), item.production.get(index + 1)) {
413        //             match counter.entry(AtomPair(p1.clone(), p2.clone())) {
414        //                 Occupied(entry) => {
415        //                     println!("o item {:?}", item);
416        //                     let mut count = entry.into_mut();
417        //                     if let Ok(x) = symbol_str.parse::<u16>() {
418        //                         *count = x;
419        //                     }
420        //                 },
421        //                 Vacant(entry) => {
422        //                     println!("v item {:?}", item);
423        //                     if let Ok(x) = symbol_str.parse::<u16>() {
424        //                         entry.insert(x);
425        //                     }
426        //                 }
427        //             }
428        //         }
429        //     }
430        // }
431
432        let mut _pass = false;
433        let mut counter_remover: Vec<AtomPair> = vec![];
434        for index in 0..prod.len() - 1 {
435            if _pass {_pass = false; continue}
436
437            if let (Some(p1), Some(p2)) = (prod.get(index), prod.get(index + 1)) {
438                match counter.entry(AtomPair(p1.clone(), p2.clone())) {
439                    Occupied(entry) => {
440                        let count = entry.get();
441                        if *count <= 1 {
442                            new_prod.push(p1.clone());
443                            if index == prod.len() - 2 {
444                                new_prod.push(p2.clone());
445                            }
446                            counter_remover.push(AtomPair(p1.clone(), p2.clone()));
447                        } else {
448                            new_prod.push(Arc::new(Atom::Symbol(Arc::new("N".to_string() + &*count.to_string()))));
449                            _pass = true;
450                        }
451
452                        
453                    },
454                    Vacant(entry) => {}
455                }
456            }
457        }
458        for item in counter_remover.iter() {
459            counter.remove(&item);
460        }
461
462        // println!("counter {:?}", counter);
463
464        if new_prod == prod {
465            (new_prod, _ret)
466        } else {
467            let (_new_prod, _new_ret) = gen_grammars(new_prod, &mut counter, n_count);
468            _ret.extend(_new_ret);
469            (_new_prod, _ret)
470            // (new_prod, _ret)
471        }
472    }
473
474    let mut n_count = Box::new(1u16);
475    let mut ret: Vec<GrammarItem> = vec![];
476    let mut counter: HashMap<AtomPair, u16> = HashMap::new();
477
478    for input_item in input.iter() {
479        // let input_item = Arc::new(Atom::Terminal(Arc::new(_char.clone().to_string())));
480
481        let mut prod: Vec<Arc<Atom>> = vec![];
482        if ret.len() == 0 {
483            prod = vec![input_item.clone()];
484            ret.push(GrammarItem {symbol: Arc::new(Atom::Symbol(Arc::new("N1".to_string()))), 
485                production: vec![input_item.clone()]});
486        } else if let Some(n1) = ret.get_mut(0) {
487            n1.production.push(input_item.clone());
488            prod = n1.production.clone();
489        }
490
491        let (new_prod, new_grammars) = gen_grammars(prod, &mut counter, &mut n_count);
492        if let Some(n1) = ret.get_mut(0) {
493            n1.production = new_prod;
494        }
495        ret.extend(new_grammars);
496    }
497
498    ret.dedup();
499    ret
500}
501fn main(){
502    let test_str = "
503        Goal = List 
504        List = Pair+
505        Pair = \'(\' Pair? \')\' 
506    ";
507
508    let test_str2 = "
509        A = a c? d* (e (f \'g\')* h)+ i
510    ";
511
512    let test_str3 = "
513        Goal = Stmt
514        Stmt = \'if\' \'expr\' \'then\' Stmt (\'else\' Stmt)?
515        Stmt = \'S\'
516    ";
517
518    let test_str4 = "
519G = file_input
520
521file_input =  simple_stmt 
522
523simple_stmt = expr_stmt ( ';' expr_stmt )* ';'? 'NEWLINE'
524
525expr_stmt = atom ('augassign'('yield_expr'|'testlist')|('='('yield_expr'|atom))*)
526
527atom= 'NAME' | 'SHORT_STRING'
528
529    ";
530    // TODO: () * + ?
531    let result = grammar_gen(test_str4, Rc::new(vec![]));
532    println!("{:?}", result);
533    for item in result.iter() {
534        println!("{:?}", (item.clone().symbol, item.clone().production));
535    }
536    // let mut sequitur_input: Vec<Arc<Atom>> = vec![];
537    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("if".to_string()))));
538    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("expr".to_string()))));
539    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("then".to_string()))));
540    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("if".to_string()))));
541    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("expr".to_string()))));
542    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("then".to_string()))));
543    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("S".to_string()))));
544    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("else".to_string()))));
545    // sequitur_input.push(Arc::new(Atom::Terminal(Arc::new("S".to_string()))));
546
547 
548    // let sequitur_result = sequitur(sequitur_input);
549    // println!("{:?}", sequitur_result);
550    // for item in result {
551    //     let symbol = match item.symbol {Atom::Symbol(x) => x, Atom::Terminal(x) => "".to_string()};
552    //     println!("{:?}", symbol);
553    //     for p in item.production {
554    //         let p_symbol = match p {Atom::Symbol(x) => ("S", x), Atom::Terminal(x) => ("T", x)};
555    //         println!("    {:?}", p_symbol);
556    //     }
557    // }
558
559    // A~ = b c
560    // A
561    // ~A = a A~ | ~A A~
562    // A = ~A e
563}