1use std::fs;
2
3type Loc = (usize, usize);
4
5#[derive(Debug, PartialEq, Clone)]
6pub enum Token {
7 Keyword(String, Loc),
8 Section(String, String, Loc),
9 Integer(usize, Loc),
10 Float(f64, Loc),
11 Symbol(char, String, Loc),
12 Ident(String, Loc),
13}
14
15#[derive(Debug)]
16enum Value {
17 Start(String),
18 End(String, String),
19}
20
21enum StartOrSection<'a> {
22 Start(Vec<String>),
23 Section(&'a Section),
24}
25
26#[derive(PartialEq, Eq)]
27enum Mode {
28 Section,
29 Normal,
30}
31
32#[derive(Debug, Clone)]
33pub struct Lexer {
34 pub keywords: Vec<String>,
35 pub sections: Vec<Section>,
36 pub symbols: Vec<(char, String)>,
37 pub buffer: Vec<u8>,
38 pub allow_whitespace: bool,
39}
40
41#[derive(Debug, Clone)]
42pub struct Section {
43 pub name: String,
44 pub start: String,
45 pub end: String,
46}
47
48
49impl Token {
50 pub fn as_string(&self) -> String {
51 return match self {
52 Token::Keyword(keyword, _) => keyword.clone(),
53 Token::Section(_, value, _) => value.clone(),
54 Token::Integer(integer, _) => integer.to_string(),
55 Token::Float(float, _) => float.to_string(),
56 Token::Symbol(value, _, _) => value.to_string(),
57 Token::Ident(ident, _) => ident.clone(),
58 };
59 }
60
61 pub fn loc(&self) -> Loc {
62 return match self {
63 Token::Keyword(_, loc) => *loc,
64 Token::Section(_, _, loc) => *loc,
65 Token::Integer(_, loc) => *loc,
66 Token::Float(_, loc) => *loc,
67 Token::Symbol(_, _, loc) => *loc,
68 Token::Ident(_, loc) => *loc,
69 };
70 }
71
72 pub fn is_keyword(&self, keyword: &str) -> Result<(), Box<dyn std::error::Error>> {
73 if let Token::Keyword(value, _) = self {
74 if value == keyword {
75 return Ok(());
76 }
77 }
78 return Err(format!("expected keyword: {:?}", self).into());
79 }
80
81 pub fn is_section(&self, name: &str) -> Result<String, Box<dyn std::error::Error>> {
82 if let Token::Section(s_name, value, _) = self {
83 if name == s_name {
84 return Ok(value.clone());
85 }
86 }
87 return Err(format!("expected section: {:?}", self).into());
88 }
89
90 pub fn is_ident(&self) -> Result<String, Box<dyn std::error::Error>> {
91 if let Token::Ident(value, _) = self {
92 return Ok(value.clone());
93 }
94 return Err(format!("expected ident: {:?}", self).into());
95 }
96
97 pub fn is_integer(&self) -> Result<usize, Box<dyn std::error::Error>> {
98 if let Token::Integer(integer, _) = self {
99 return Ok(*integer);
100 }
101 return Err(format!("expected integer: {:?}", self).into());
102 }
103
104 pub fn is_float(&self) -> Result<f64, Box<dyn std::error::Error>> {
105 if let Token::Float(float, _) = self {
106 return Ok(*float);
107 }
108 return Err(format!("expected float: {:?}", self).into());
109 }
110
111 pub fn is_symbol(&self, name: &str) -> Result<(), Box<dyn std::error::Error>> {
112 if let Token::Symbol(_, s_name, _) = self {
113 if s_name == name {
114 return Ok(());
115 }
116 }
117 return Err(format!("expected symbol: {:?}", self).into());
118 }
119}
120
121impl Section {
122 pub fn new(name: &str, start: &str, end: &str) -> Section {
123 return Section {
124 name: name.to_string(),
125 start: start.to_string(),
126 end: end.to_string(),
127 };
128 }
129
130 pub fn from_end(end: String) -> Section {
131 return Section {
132 name: String::new(),
133 start: String::new(),
134 end,
135 };
136 }
137}
138
139impl Lexer {
140 pub fn new(keywords: &[String], sections: &[Section], symbols: &[(char, String)], allow_whitespace: bool) -> Lexer {
141 return Lexer {
142 keywords: keywords.to_vec(),
143 sections: sections.to_vec(),
144 symbols: symbols.to_vec(),
145 buffer: Vec::new(),
146 allow_whitespace,
147 };
148 }
149
150 pub fn load_str(&mut self, string: &str) {
151 self.buffer = string.as_bytes().to_vec();
152 }
153
154 pub fn load_file(&mut self, filename: &str) -> Result<(), Box<dyn std::error::Error>> {
155 self.buffer = fs::read(filename)?;
156 return Ok(());
157 }
158
159 fn symbols_contain(&self, value: &char) -> Option<&str> {
160 for symbol in &self.symbols {
161 if symbol.0 == *value {
162 return Some(&symbol.1);
163 }
164 }
165 return None;
166 }
167
168 fn section_exists(&self, start: &str, end: &str) -> Result<String, ()> {
169 for section in &self.sections {
170 if section.start == start && section.end == end {
171 return Ok(section.name.to_string());
172 }
173 }
174 return Err(());
175 }
176
177 fn is_section(&self, value: Value) -> Result<StartOrSection, ()> {
178 let mut matches: Vec<String> = Vec::new();
179 for section in &self.sections {
180 if let Value::Start(start) = &value {
181 if §ion.start == start {
182 matches.push(section.end.clone());
183 }
184 } else if let Value::End(start, end) = &value {
185 if §ion.end == end && §ion.start == start {
186 return Ok(StartOrSection::Section(section)); }
188 }
189 }
190
191 if matches.len() != 0 {
192 return Ok(StartOrSection::Start(matches));
193 }
194 return Err(());
195 }
196
197 fn is_numeric(&self, token: &String, loc: Loc) -> Token {
198 if let Ok(integer) = token.parse::<usize>() {
199 return Token::Integer(integer, loc);
200 } else if let Ok(integer) = token.parse::<f64>() {
201 return Token::Float(integer, loc);
202 } else {
203 return Token::Ident(token.clone(), loc);
204 }
205 }
206
207 fn lex_token(&self, token: &String, loc: Loc) -> Option<Token> {
208 if token == "\n" {
209 return None;
210 } else if token == "" {
211 if self.allow_whitespace {
212 return Some(Token::Ident(" ".to_string(), loc));
213 } else {
214 return None;
215 }
216 } else if self.keywords.contains(&token) {
217 return Some(Token::Keyword(token.clone(), loc));
218 } else if token.len() == 1 {
219 let character = token.chars().collect::<Vec<char>>()[0];
220 if let Some(symbol_name) = self.symbols_contain(&character) {
221 return Some(Token::Symbol(character, symbol_name.to_string(), loc));
222 } else {
223 return Some(self.is_numeric(token, loc));
224 }
225 } else if let Ok(name) = self.section_exists(&token[0..1], &token[token.len()-1..token.len()]) {
226 return Some(Token::Section(name, token[1..token.len() - 1].to_string(), loc));
227 } else {
228 return Some(self.is_numeric(token, loc));
229 }
230 }
231
232 pub fn tokenize(&mut self) -> Result<Vec<Token>, Box<dyn std::error::Error>> {
233 if self.symbols_contain(&' ').is_none() {
234 self.symbols.push((' ', "Space".to_string()));
235 }
236
237 let mut mode = Mode::Normal;
238 let mut token = String::new();
239 let mut tokens: Vec<Token> = Vec::new();
240 let mut section: Vec<Section> = Vec::new();
241 let mut loc = (1, 1);
242
243 let mut index = 0;
244 while index < self.buffer.len() {
245 let byte = &self.buffer[index];
246 let character = String::from_utf8(vec![byte.clone()])?;
247 if (index + 1) < self.buffer.len() {
248 if mode == Mode::Normal {
249 if let Ok(StartOrSection::Start(ends)) = self.is_section(Value::Start(character.clone())) {
250 token = token + &character;
251 for end in ends {
252 section.push(Section::from_end(end.clone()));
253 let idx = section.len() - 1;
254 section[idx].start = character.clone();
255 }
256 mode = Mode::Section;
257 } else if character.as_str() == "\n" {
258 self.lex_token(&token, loc).map(|t| tokens.push(t));
259 token = String::new();
260 } else if character.as_str() != " " {
261 token = token + &character;
262 }
263 if (self.symbols_contain(&char::from(byte.clone())).is_some() || self.symbols_contain(&char::from(self.buffer[index + 1])).is_some()) &&
264 section.len() == 0 { self.lex_token(&token, loc).map(|t| tokens.push(t));
266 token = String::new();
267 }
268 } else if mode == Mode::Section {
269 if &character == "\\" {
270 if index + 1 >= self.buffer.len() {
271 return Ok(tokens);
272 } else {
273 index += 1;
274 token = token + &(self.buffer[index] as char).to_string();
275 }
276 } else if self.is_section(Value::End(section[0].start.to_string(), character.clone())).is_ok() || index + 2 >= self.buffer.len() { token = token + &character;
278 self.lex_token(&token, loc).map(|t| tokens.push(t));
279 section = Vec::new();
280 token = String::new();
281 mode = Mode::Normal;
282 } else {
283 token = token + &character;
284 }
285 }
286 }
287
288 if &character == "\n" {
289 loc.0 += 1;
290 loc.1 = 1;
291 } else {
292 loc.1 += 1;
293 }
294 index += 1;
295 }
296
297 return Ok(tokens);
298 }
299}
300
301
302#[cfg(test)]
303mod tests {
304 use super::*;
305
306 #[test]
307 fn load_test() -> Result<(), Box<dyn std::error::Error>> {
308 let mut lexer = Lexer::new(
309 &["def".to_string(), "if".to_string(), "return".to_string()],
310 &[Section::new("string", "\"", "\"")],
311 &[(':', "column".to_string()), ('(', "openbrace".to_string()), (')', "closebrace".to_string()), (' ', "space".to_string())],
312 true,
313 );
314
315 lexer.load_str("\" ");
317
318 println!("tokens: {:?}", lexer.tokenize()?);
319 return Ok(());
320 }
321}
322
323