1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#![allow(unused_imports)]
#![allow(unused_variables)]
extern crate regex;
use std::collections::HashSet;
use glr_grammar;
use glr_grammar::Atom as Atom;
use glr_grammar::GrammarItem as GrammarItem;
use std::sync::Arc;
use self::regex::Regex;
#[derive(Debug,Clone,Hash,PartialEq,Eq,PartialOrd,Ord)]
pub struct Lex {
pub atom: Arc<Atom>,
pub value: Option<Arc<String>>
}
fn escape_re_string(raw: String) -> String {
let re = Regex::new(r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])").unwrap();
re.replace_all(&raw, "\\$c")
}
fn gen_re(lex_re_string: String, grammar_strings: Vec<String>) -> (Vec<String>, String) {
let mut re_string: String = String::new();
let mut tokens: Vec<String> = Vec::new();
let test_w = Regex::new(r"^\w+$").unwrap();
let mut added_grammar: HashSet<String> = HashSet::new();
for item in grammar_strings.iter() {
if added_grammar.contains(item) {continue}
if re_string.len() > 0 { re_string.push('|'); }
re_string.push_str(item);
if test_w.is_match(item) {
re_string.push_str("\\b");
}
added_grammar.insert(item.clone());
}
for line in lex_re_string.split("\n") {
if line.trim().len() == 0 {continue}
let mut reg: String = String::new();
let mut index = 0u16;
for item in line.split("=") {
if index == 0 {
tokens.push(item.trim().to_string());
} else {
if index > 1 {reg.push_str("=")}
reg.push_str(item.trim());
}
index += 1;
}
re_string.push_str(&("|(".to_string() + ® + ")"));
}
(tokens, re_string)
}
pub fn gen_lex(program_raw: String, raw_lex_string: String, raw_grammar_string: String) -> (Vec<Arc<Lex>>, Vec<String>){
let mut ret: Vec<Arc<Lex>> = Vec::new();
let mut grammar_strings: Vec<String> = Vec::new();
let re = Regex::new("'[^']+'").unwrap();
for cap in re.captures_iter(&raw_grammar_string) {
for val in cap.iter() {
grammar_strings.push(escape_re_string(val.unwrap().to_string().replace("'", "")));
}
}
let (tokens, re_string) = gen_re(raw_lex_string, grammar_strings);
match Regex::new(&re_string) {
Err(e) => {panic!("Lex Creating Error...")},
Ok(ret_re) => {
for cap in ret_re.captures_iter(&program_raw) {
let mut index = 0u16;
let mut val: String = String::new();
for name in cap.iter() {
if index == 0 {index += 1; continue;}
if let Some(x) = name {
val = x.to_string();
break;
}
index += 1;
}
if index as usize == cap.len() {
ret.push(Arc::new(Lex {atom: Arc::new(Atom::Terminal(Arc::new(cap.at(0).unwrap().to_string()))), value: None}));
} else if let Some(token) = tokens.get(index as usize - 1) {
ret.push(Arc::new(Lex {atom: Arc::new(Atom::Terminal(Arc::new(token.clone()))), value: Some(Arc::new(val))}));
}
}
}
}
(ret, tokens)
}