1use serde::{Deserialize, Serialize};
36
37use std::collections::HashMap;
38use regex::*;
39
40#[derive(Serialize, Deserialize)]
41struct RuleSet { literals: HashMap<String, String>,
43 whitespace: String
44}
45
46#[derive(Clone)]
47struct RegexRuleSet { literals: HashMap<String, Regex>,
49 whitespace: Regex
50}
51
52#[allow(dead_code)]
53impl RegexRuleSet {
54 fn from(ruleset: RuleSet) -> Self {
55 Self {
56 literals: {
58 let mut hm: HashMap<String, Regex> = HashMap::new();
59 for (k, v) in ruleset.literals {
60 hm.insert(k, Regex::new(&v).unwrap());
61 }
62 hm
63 },
64 whitespace: Regex::new(&ruleset.whitespace).unwrap()
65 }
66 }
67 fn from_string(json: String) -> Self {
68 Self::from(serde_json::from_str::<RuleSet>(&json).unwrap())
69 }
70}
71
72#[derive(Clone)]
73pub struct Token {
75 pub token_type: String,
76 pub value: String,
77 pub line: usize
78}
79
80#[allow(dead_code)]
81impl Token {
82 pub fn is<T: ToString>(&self, types: Vec<T>) -> bool {
84 {
85 let mut v = vec![];
86 for t in types {
87 v.push(t.to_string());
88 }
89 v
90 }.contains(&self.token_type)
91 }
92}
93
94impl std::fmt::Display for Token {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 write!(f, "{}({})", self.token_type, self.value)
97 }
98}
99
100#[derive(Clone)]
101pub struct Lexer {
111 source: String,
112 last_token: Option<Result<Token, ParsingError>>,
113 cache: Option<Result<Token, ParsingError>>,
114 rules: RegexRuleSet,
115 line: usize
116}
117
118#[derive(Clone, Debug)]
119pub enum ParsingError {
120 EndOfFileError,
121 UnrecognizedPatternError(String),
122}
123
124#[allow(dead_code)]
125impl Lexer {
126 pub fn from(json: String, source: String) -> Self {
128 Self {
129 source: source,
130 last_token: None,
131 cache: None,
132 rules: RegexRuleSet::from_string(json),
133 line: 0
134 }
135 }
136
137 pub fn from_args(literals: HashMap<String, String>, whitespace: String, source: String) -> Self {
139 Self {
140 source: source,
141 last_token: None,
142 cache: None,
143 rules: RegexRuleSet::from(RuleSet { literals: literals, whitespace: whitespace } ),
144 line: 0
145 }
146 }
147
148 fn ch(&self) -> char {
149 (&self.source).as_bytes()[0] as char
150 }
151
152 fn skip_whitespace(&mut self) {
153 let mat = match self.rules.whitespace.find(&self.source) { Some(a) => (a.start() as i32, a.end() as i32), None => (-1, -1)};
154 if mat.0 == 0 {
155 for _i in mat.0..mat.1 {
156 match self.source.remove(0) {
157 '\n' => self.line += 1,
158 _ => {}
159 }
160 }
161 }
162 }
163
164 pub fn done(&self) -> bool {
165 0 >= self.source.len()
166 }
167
168 fn get(& mut self) -> char {
169 match self.source.remove(0) {
170 c => {
171 if c == '\n' { self.line += 1; }
172 c
173 }
174 }
175 }
176
177 fn parse_next(&mut self) -> Result<Token, ParsingError> {
178 self.skip_whitespace();
179 if !self.done() {
180 let mut name = String::new();
181 let mut mat: (i32, i32) = (-1, -1);
182 for (lit_type, pat) in &self.rules.literals {
183 let new_mat = match pat.find(&self.source) {
184 Some(thing) => thing,
185 None => continue
186 };
187 if new_mat.start() == 0 && new_mat.end() as i32 > mat.1 {
188 mat = (new_mat.start() as i32, new_mat.end() as i32);
189 name = lit_type.clone();
190 }
191 }
192 if mat.0 != 0 { return Err(ParsingError::UnrecognizedPatternError(String::from(self.get())))
194 }
195 let mut lexeme = String::new();
196 for _ in 0..mat.1 {
197 lexeme.push(self.get());
198 }
199 return Ok(Token { token_type: name, value: lexeme, line: self.line });
200 }
201 Err(ParsingError::EndOfFileError)
202 }
203
204 pub fn next_token(&mut self) -> Result<Token, ParsingError> {
206 match self.cache.clone() {
207 Some(token) => {
208 self.cache = None;
209 self.last_token = Some(token);
210 self.last_token.clone().unwrap()
211 }
212 None => {
213 self.last_token = Some(self.parse_next());
214 self.last_token.clone().unwrap()
215 }
216 }
217 }
218
219 pub fn next_token_x(& mut self) -> Token {
221 self.next_token().unwrap()
222 }
223
224 pub fn current_token(&self) -> Option<Result<Token, ParsingError>> {
226 self.last_token.clone()
227 }
228
229 pub fn current_token_x(&self) -> Token {
231 self.current_token().unwrap().unwrap()
232 }
233
234 pub fn peek_next_token(&mut self) -> Option<Result<Token, ParsingError>> {
236 self.cache = Some(self.next_token());
237 self.cache.clone()
238 }
239
240 pub fn peek_next_token_x(&mut self) -> Token {
242 self.peek_next_token().unwrap().unwrap()
243 }
244}