1use crate::lexers::Token;
3
4pub struct Lexer {
5 input: Vec<char>,
6 pub position: usize,
7 pub read_position: usize,
8 pub ch: char,
9}
10
11fn is_letter(ch: char) -> bool {
12 ch.is_alphabetic() || ch == '_'
13}
14
15impl Lexer {
16 pub fn new(input: Vec<char>) -> Self {
17 Self {
18 input,
19 position: 0,
20 read_position: 0,
21 ch: '\0',
22 }
23 }
24
25 pub fn read_char(&mut self) {
26 if self.read_position >= self.input.len() {
27 self.ch = '\0';
28 } else {
29 self.ch = self.input[self.read_position];
30 }
31 self.position = self.read_position;
32 self.read_position += 1;
33 }
34
35 pub fn next_token(&mut self) -> Token {
36 let read_identifier = |l: &mut Lexer| -> Vec<char> {
37 let position = l.position;
38 while l.position < l.input.len() && is_letter(l.ch) {
39 l.read_char();
40 }
41 l.input[position..l.position].to_vec()
42 };
43
44 let read_string = |l: &mut Lexer, ch: char| -> Vec<char> {
45 let position = l.position;
46 l.read_char();
47 while l.position < l.input.len() && l.ch != ch {
48 if l.ch == '\\' {
49 l.read_char();
50 }
51 l.read_char();
52 }
53 l.read_char();
54 if l.position > l.input.len() {
55 l.position -= 1;
56 l.read_position -= 1;
57 }
58 l.input[position..l.position].to_vec()
59 };
60
61 let read_number = |l: &mut Lexer| -> Vec<char> {
62 let position = l.position;
63 while l.position < l.input.len() && l.ch.is_numeric() {
64 l.read_char();
65 }
66 l.input[position..l.position].to_vec()
67 };
68
69 let tok: Token;
70 if self.ch == '/' {
71 let next_id = String::from("/*").chars().collect::<Vec<_>>();
72 let next_position = self.position + next_id.len();
73 let end_id = String::from("*/").chars().collect::<Vec<_>>();
74 if self.position + next_id.len() < self.input.len()
75 && self.input[self.position..next_position] == next_id
76 {
77 let mut identifier = next_id.clone();
78 next_id.iter().for_each(|_| self.read_char());
79 let start_position = self.position;
80 while self.position < self.input.len() {
81 if self.ch == '*' {
82 let end_position = self.position + end_id.len();
83 if end_position <= self.input.len()
84 && self.input[self.position..end_position] == end_id
85 {
86 end_id.to_owned().iter().for_each(|_| self.read_char());
87 break;
88 }
89 }
90 self.read_char();
91 }
92 identifier.append(&mut self.input[start_position..self.position].to_vec());
93 return Token::COMMENT(identifier);
94 }
95 }
96 if self.read_position < self.input.len()
97 && self.ch == '='
98 && self.input[self.read_position] == '='
99 {
100 self.read_char();
101 self.read_char();
102 return Token::KEYWORD(vec!['=', '=']);
103 }
104
105 if self.read_position < self.input.len()
106 && self.ch == '='
107 && self.input[self.read_position] == '>'
108 {
109 self.read_char();
110 self.read_char();
111 return Token::KEYWORD(vec!['=', '>']);
112 }
113
114 if self.read_position < self.input.len()
115 && self.ch == '&'
116 && self.input[self.read_position] == '&'
117 {
118 self.read_char();
119 self.read_char();
120 return Token::KEYWORD(vec!['&', '&']);
121 }
122
123 if self.read_position < self.input.len()
124 && self.ch == '?'
125 && self.input[self.read_position] == '?'
126 {
127 self.read_char();
128 self.read_char();
129 return Token::KEYWORD(vec!['?', '?']);
130 }
131
132 if self.read_position < self.input.len()
133 && self.ch == '!'
134 && self.input[self.read_position] == '='
135 {
136 self.read_char();
137 self.read_char();
138 return Token::KEYWORD(vec!['!', '=']);
139 }
140
141 if self.read_position < self.input.len()
142 && self.ch == '/'
143 && self.input[self.read_position] == '/'
144 {
145 return Token::COMMENT(read_string(self, '\n'));
146 }
147
148 if self.read_position < self.input.len()
149 && self.ch == '='
150 && self.input[self.read_position] == '='
151 {
152 self.read_char();
153 self.read_char();
154 return Token::KEYWORD(vec!['=', '=']);
155 }
156
157 if self.read_position < self.input.len()
158 && self.ch == '='
159 && self.input[self.read_position] == '>'
160 {
161 self.read_char();
162 self.read_char();
163 return Token::KEYWORD(vec!['=', '>']);
164 }
165
166 if self.read_position < self.input.len()
167 && self.ch == '&'
168 && self.input[self.read_position] == '&'
169 {
170 self.read_char();
171 self.read_char();
172 return Token::KEYWORD(vec!['&', '&']);
173 }
174
175 if self.read_position < self.input.len()
176 && self.ch == '?'
177 && self.input[self.read_position] == '?'
178 {
179 self.read_char();
180 self.read_char();
181 return Token::KEYWORD(vec!['?', '?']);
182 }
183
184 if self.read_position < self.input.len()
185 && self.ch == '!'
186 && self.input[self.read_position] == '='
187 {
188 self.read_char();
189 self.read_char();
190 return Token::KEYWORD(vec!['!', '=']);
191 }
192
193 match self.ch {
194 '\n' => {
195 tok = Token::ENDL(self.ch);
196 }
197 '\0' => {
198 tok = Token::EOF;
199 }
200 '0' => {
201 return if self.input[self.read_position] == 'x' {
202 let start_position = self.position;
203 self.read_char();
204 self.read_char();
205 while self.position < self.input.len()
206 && (self.ch.is_numeric() || is_letter(self.ch))
207 {
208 self.read_char();
209 }
210 let hexadecimal = &self.input[start_position..self.position];
211 Token::INT(hexadecimal.to_vec())
212 } else {
213 let number = read_number(self);
214 Token::INT(number)
215 }
216 }
217 '&' => {
218 tok = Token::KEYWORD(vec![self.ch]);
219 }
220 '-' => {
221 tok = Token::KEYWORD(vec![self.ch]);
222 }
223 '>' => {
224 tok = Token::KEYWORD(vec![self.ch]);
225 }
226 '<' => {
227 tok = Token::KEYWORD(vec![self.ch]);
228 }
229 '|' => {
230 tok = Token::KEYWORD(vec![self.ch]);
231 }
232 '!' => {
233 tok = Token::KEYWORD(vec![self.ch]);
234 }
235 '=' => {
236 tok = Token::KEYWORD(vec![self.ch]);
237 }
238 '*' => {
239 tok = Token::KEYWORD(vec![self.ch]);
240 }
241 '%' => {
242 tok = Token::KEYWORD(vec![self.ch]);
243 }
244 '?' => {
245 tok = Token::KEYWORD(vec![self.ch]);
246 }
247 '@' => {
248 if is_letter(self.input[self.position + 1]) {
249 let mut identifier = vec![self.ch];
250 self.read_char();
251 identifier.append(&mut read_identifier(self));
252 return Token::KEYWORD(identifier);
253 }
254 tok = Token::CH(self.ch);
255 }
256 _ => {
257 return if is_letter(self.ch) {
258 #[allow(unused_variables)]
259 let start_position = self.position;
260 #[allow(unused_mut)]
261 let mut identifier: Vec<char> = read_identifier(self);
262 if self.ch.is_numeric() {
263 let position = self.position;
264 while self.position < self.input.len() {
265 if !self.ch.is_numeric() && !is_letter(self.ch) {
266 break;
267 }
268 self.read_char();
269 }
270 identifier.append(&mut self.input[position..self.position].to_vec());
271 }
272 match get_keyword_token(&identifier) {
273 Ok(keyword_token) => keyword_token,
274 Err(_) => {
275 if self.ch == '(' {
276 return Token::ENTITY(identifier);
277 } else if self.ch.is_whitespace() {
278 let mut position = self.position;
279 let mut ch = self.input[position];
280 while position < self.input.len() && ch.is_whitespace() {
281 position += 1;
282 if position < self.input.len() {
283 ch = self.input[position];
284 }
285 }
286 if ch == '(' {
287 return Token::ENTITY(identifier);
288 }
289 }
290 Token::IDENT(identifier)
291 }
292 }
293 } else if self.ch.is_numeric() {
294 let mut identifier: Vec<char> = read_number(self);
295 if self.ch == 'f' {
296 identifier.append(&mut vec![self.ch]);
297 self.read_char();
298 }
299 Token::INT(identifier)
300 } else if self.ch == '\'' {
301 let str_value: Vec<char> = read_string(self, '\'');
302 Token::STRING(str_value)
303 } else if self.ch == '"' {
304 let str_value: Vec<char> = read_string(self, '"');
305 Token::STRING(str_value)
306 } else {
307 Token::ILLEGAL
308 }
309 }
310 }
311 self.read_char();
312 tok
313 }
314}
315
316pub fn get_keyword_token(identifier: &Vec<char>) -> Result<Token, String> {
317 let id: String = identifier.into_iter().collect();
318 match &id[..] {
319 "this" | "true" | "false" | "super" | "null" | "String" | "Long" | "Object" | "Boolean"
320 | "Array" | "List" | "ArrayList" | "Arrays" | "Map" | "HashMap" | "LinkedHashSet" => {
321 Ok(Token::CONSTANT(identifier.clone()))
322 }
323 "abstract" | "byte" | "break" | "class" | "double" | "float" | "final" | "int"
324 | "interface" | "char" | "case" | "default" | "short" | "for" | "package" | "import"
325 | "public" | "private" | "protected" | "extends" | "static" | "void" | "return" | "new"
326 | "if" | "else" | "enum" | "instanceof" | "boolean" | "assert" | "continue" | "native"
327 | "switch" | "synchronized" | "try" | "throw" | "catch" | "volatile" | "while"
328 | "throws" | "finally" | "long" | "do" | "transient" | "strictfp" | "var" => {
329 Ok(Token::KEYWORD(identifier.clone()))
330 }
331 _ => Err(String::from("Not a keyword")),
332 }
333}
334
335pub fn render_html(input: Vec<char>) -> String {
336 let mut l = Lexer::new(input);
337 l.read_char();
338 let mut html = String::new();
339 let mut line = 1;
340 html.push_str("<table class=\"highlight-table\">\n");
341 html.push_str("<tbody>\n");
342 html.push_str("<tr>");
343 html.push_str(&format!(
344 "<td class=\"hl-num\" data-line=\"{}\"></td><td>",
345 line
346 ));
347
348 loop {
349 let token = l.next_token();
350 if token == Token::EOF {
351 html.push_str("</td></tr>\n");
352 break;
353 }
354
355 match token {
356 Token::INT(value) => {
357 html.push_str(&format!(
358 "<span class=\"hl-c\">{}</span>",
359 value.iter().collect::<String>()
360 ));
361 }
362 Token::IDENT(value) => {
363 html.push_str(&value.iter().collect::<String>());
364 }
365 Token::STRING(value) => {
366 let mut s = String::new();
367 for ch in value {
368 if ch == '<' {
369 s.push_str("<");
370 } else if ch == '>' {
371 s.push_str(">");
372 } else {
373 s.push(ch);
374 }
375 }
376 html.push_str(&format!("<span class=\"hl-s\">{}</span>", s));
377 }
378 Token::CH(value) => {
379 html.push(value);
380 }
381 Token::ENTITY(value) => {
382 html.push_str(&format!(
383 "<span class=\"hl-en\">{}</span>",
384 value.iter().collect::<String>()
385 ));
386 }
387 Token::CONSTANT(value) => {
388 html.push_str(&format!(
389 "<span class=\"hl-c\">{}</span>",
390 value.iter().collect::<String>()
391 ));
392 }
393 Token::KEYWORD(value) => {
394 html.push_str(&format!(
395 "<span class=\"hl-k\">{}</span>",
396 value.iter().collect::<String>()
397 ));
398 }
399 Token::COMMENT(value) => {
400 let mut lines = String::new();
401 for ch in value {
402 if ch == '<' {
403 lines.push_str("<");
404 } else if ch == '>' {
405 lines.push_str(">");
406 } else {
407 lines.push(ch);
408 }
409 }
410 let split = lines.split("\n");
411 let split_len = split.clone().collect::<Vec<&str>>().len();
412 let mut index = 0;
413 for val in split {
414 if val.len() > 1 {
415 html.push_str(&format!("<span class=\"hl-cmt\">{}</span>", val));
416 }
417 index = index + 1;
418 if index != split_len {
419 line = line + 1;
420 html.push_str("</td></tr>\n");
421 html.push_str(&format!(
422 "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
423 line
424 ));
425 }
426 }
427 }
428 Token::ENDL(_) => {
429 line = line + 1;
430 html.push_str("</td></tr>\n");
431 html.push_str(&format!(
432 "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
433 line
434 ));
435 }
436 _ => {
437 html.push(l.ch);
438 l.read_char();
439 }
440 }
441 }
442
443 html.push_str("</tbody>\n");
444 html.push_str("</table>");
445 html
446}