glr_parser/
glr_lex.rs

1#![allow(unused_imports)]
2#![allow(unused_variables)]
3
4extern crate regex;
5
6
7use std::collections::HashSet;
8
9use glr_grammar;
10use glr_grammar::Atom as Atom;
11use glr_grammar::GrammarItem as GrammarItem;
12use std::sync::Arc;
13
14use self::regex::Regex;
15
16#[derive(Debug,Clone,Hash,PartialEq,Eq,PartialOrd,Ord)]
17pub struct Lex {
18    pub atom: Arc<Atom>,
19    pub value: Option<Arc<String>>
20}
21
22fn escape_re_string(raw: String) -> String {
23    let re = Regex::new(r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])").unwrap();
24    re.replace_all(&raw, "\\$c")
25}
26fn gen_re(lex_re_string: String, grammar_strings: Vec<String>) -> (Vec<String>, String) {
27    let mut re_string: String = String::new();
28    let mut tokens: Vec<String> = Vec::new();
29    let test_w = Regex::new(r"^\w+$").unwrap();
30
31    let mut added_grammar: HashSet<String> = HashSet::new();
32    for item in grammar_strings.iter() {
33        if added_grammar.contains(item) {continue}
34
35        if re_string.len() > 0 { re_string.push('|'); }
36        re_string.push_str(item);
37        if test_w.is_match(item) {
38            re_string.push_str("\\b");
39        }
40        // re_string.push_str(&("(?:".to_string() + item + ")"));
41
42        // tokens.push(item.clone());
43        added_grammar.insert(item.clone());
44    }
45    
46    for line in lex_re_string.split("\n") {
47        if line.trim().len() == 0 {continue}
48        let mut reg: String = String::new();
49        let mut index = 0u16;
50        for item in line.split("=") {
51            if index == 0 {
52                tokens.push(item.trim().to_string());
53            } else {
54                if index > 1 {reg.push_str("=")}
55                reg.push_str(item.trim());
56            }
57            index += 1;
58        }
59        re_string.push_str(&("|(".to_string() + &reg + ")"));
60    }
61    
62    // println!("{:?}", re_string);
63    (tokens, re_string)
64}
65
66pub fn gen_lex(program_raw: String, raw_lex_string: String, raw_grammar_string: String) -> (Vec<Arc<Lex>>, Vec<String>){
67    let mut ret: Vec<Arc<Lex>> = Vec::new();
68    let mut grammar_strings: Vec<String> = Vec::new();
69    let re = Regex::new("'[^']+'").unwrap();
70    for cap in re.captures_iter(&raw_grammar_string) {
71        for val in cap.iter() {
72            grammar_strings.push(escape_re_string(val.unwrap().to_string().replace("'", "")));
73        }
74    }
75    let (tokens, re_string) = gen_re(raw_lex_string, grammar_strings);
76    
77    match Regex::new(&re_string) {
78        Err(e) => {panic!("Lex Creating Error...")},
79        Ok(ret_re) => {
80            for cap in ret_re.captures_iter(&program_raw) {
81                let mut index = 0u16;
82                let mut val: String = String::new();
83                for name in cap.iter() {
84                    if index == 0 {index += 1; continue;}
85                    if let Some(x) = name {
86                        val = x.to_string();
87                        break;
88                    }
89                    index += 1;
90                }
91
92                if index as usize == cap.len() {
93                    ret.push(Arc::new(Lex {atom: Arc::new(Atom::Terminal(Arc::new(cap.at(0).unwrap().to_string()))), value: None}));
94                } else if let Some(token) = tokens.get(index as usize - 1) {
95                    ret.push(Arc::new(Lex {atom: Arc::new(Atom::Terminal(Arc::new(token.clone()))), value: Some(Arc::new(val))}));
96                }
97            }
98        }
99    }
100
101
102    (ret, tokens)
103}