hl_core/lexers/
java.rs

1// ---- DON'T EDIT! THIS IS AUTO GENERATED CODE ---- //
2use crate::lexers::Token;
3
4pub struct Lexer {
5    input: Vec<char>,
6    pub position: usize,
7    pub read_position: usize,
8    pub ch: char,
9}
10
11fn is_letter(ch: char) -> bool {
12    ch.is_alphabetic() || ch == '_'
13}
14
15impl Lexer {
16    pub fn new(input: Vec<char>) -> Self {
17        Self {
18            input,
19            position: 0,
20            read_position: 0,
21            ch: '\0',
22        }
23    }
24
25    pub fn read_char(&mut self) {
26        if self.read_position >= self.input.len() {
27            self.ch = '\0';
28        } else {
29            self.ch = self.input[self.read_position];
30        }
31        self.position = self.read_position;
32        self.read_position += 1;
33    }
34
35    pub fn next_token(&mut self) -> Token {
36        let read_identifier = |l: &mut Lexer| -> Vec<char> {
37            let position = l.position;
38            while l.position < l.input.len() && is_letter(l.ch) {
39                l.read_char();
40            }
41            l.input[position..l.position].to_vec()
42        };
43
44        let read_string = |l: &mut Lexer, ch: char| -> Vec<char> {
45            let position = l.position;
46            l.read_char();
47            while l.position < l.input.len() && l.ch != ch {
48                if l.ch == '\\' {
49                    l.read_char();
50                }
51                l.read_char();
52            }
53            l.read_char();
54            if l.position > l.input.len() {
55                l.position -= 1;
56                l.read_position -= 1;
57            }
58            l.input[position..l.position].to_vec()
59        };
60
61        let read_number = |l: &mut Lexer| -> Vec<char> {
62            let position = l.position;
63            while l.position < l.input.len() && l.ch.is_numeric() {
64                l.read_char();
65            }
66            l.input[position..l.position].to_vec()
67        };
68
69        let tok: Token;
70        if self.ch == '/' {
71            let next_id = String::from("/*").chars().collect::<Vec<_>>();
72            let next_position = self.position + next_id.len();
73            let end_id = String::from("*/").chars().collect::<Vec<_>>();
74            if self.position + next_id.len() < self.input.len()
75                && self.input[self.position..next_position] == next_id
76            {
77                let mut identifier = next_id.clone();
78                next_id.iter().for_each(|_| self.read_char());
79                let start_position = self.position;
80                while self.position < self.input.len() {
81                    if self.ch == '*' {
82                        let end_position = self.position + end_id.len();
83                        if end_position <= self.input.len()
84                            && self.input[self.position..end_position] == end_id
85                        {
86                            end_id.to_owned().iter().for_each(|_| self.read_char());
87                            break;
88                        }
89                    }
90                    self.read_char();
91                }
92                identifier.append(&mut self.input[start_position..self.position].to_vec());
93                return Token::COMMENT(identifier);
94            }
95        }
96        if self.read_position < self.input.len()
97            && self.ch == '='
98            && self.input[self.read_position] == '='
99        {
100            self.read_char();
101            self.read_char();
102            return Token::KEYWORD(vec!['=', '=']);
103        }
104
105        if self.read_position < self.input.len()
106            && self.ch == '='
107            && self.input[self.read_position] == '>'
108        {
109            self.read_char();
110            self.read_char();
111            return Token::KEYWORD(vec!['=', '>']);
112        }
113
114        if self.read_position < self.input.len()
115            && self.ch == '&'
116            && self.input[self.read_position] == '&'
117        {
118            self.read_char();
119            self.read_char();
120            return Token::KEYWORD(vec!['&', '&']);
121        }
122
123        if self.read_position < self.input.len()
124            && self.ch == '?'
125            && self.input[self.read_position] == '?'
126        {
127            self.read_char();
128            self.read_char();
129            return Token::KEYWORD(vec!['?', '?']);
130        }
131
132        if self.read_position < self.input.len()
133            && self.ch == '!'
134            && self.input[self.read_position] == '='
135        {
136            self.read_char();
137            self.read_char();
138            return Token::KEYWORD(vec!['!', '=']);
139        }
140
141        if self.read_position < self.input.len()
142            && self.ch == '/'
143            && self.input[self.read_position] == '/'
144        {
145            return Token::COMMENT(read_string(self, '\n'));
146        }
147
148        if self.read_position < self.input.len()
149            && self.ch == '='
150            && self.input[self.read_position] == '='
151        {
152            self.read_char();
153            self.read_char();
154            return Token::KEYWORD(vec!['=', '=']);
155        }
156
157        if self.read_position < self.input.len()
158            && self.ch == '='
159            && self.input[self.read_position] == '>'
160        {
161            self.read_char();
162            self.read_char();
163            return Token::KEYWORD(vec!['=', '>']);
164        }
165
166        if self.read_position < self.input.len()
167            && self.ch == '&'
168            && self.input[self.read_position] == '&'
169        {
170            self.read_char();
171            self.read_char();
172            return Token::KEYWORD(vec!['&', '&']);
173        }
174
175        if self.read_position < self.input.len()
176            && self.ch == '?'
177            && self.input[self.read_position] == '?'
178        {
179            self.read_char();
180            self.read_char();
181            return Token::KEYWORD(vec!['?', '?']);
182        }
183
184        if self.read_position < self.input.len()
185            && self.ch == '!'
186            && self.input[self.read_position] == '='
187        {
188            self.read_char();
189            self.read_char();
190            return Token::KEYWORD(vec!['!', '=']);
191        }
192
193        match self.ch {
194            '\n' => {
195                tok = Token::ENDL(self.ch);
196            }
197            '\0' => {
198                tok = Token::EOF;
199            }
200            '0' => {
201                return if self.input[self.read_position] == 'x' {
202                    let start_position = self.position;
203                    self.read_char();
204                    self.read_char();
205                    while self.position < self.input.len()
206                        && (self.ch.is_numeric() || is_letter(self.ch))
207                    {
208                        self.read_char();
209                    }
210                    let hexadecimal = &self.input[start_position..self.position];
211                    Token::INT(hexadecimal.to_vec())
212                } else {
213                    let number = read_number(self);
214                    Token::INT(number)
215                }
216            }
217            '&' => {
218                tok = Token::KEYWORD(vec![self.ch]);
219            }
220            '-' => {
221                tok = Token::KEYWORD(vec![self.ch]);
222            }
223            '>' => {
224                tok = Token::KEYWORD(vec![self.ch]);
225            }
226            '<' => {
227                tok = Token::KEYWORD(vec![self.ch]);
228            }
229            '|' => {
230                tok = Token::KEYWORD(vec![self.ch]);
231            }
232            '!' => {
233                tok = Token::KEYWORD(vec![self.ch]);
234            }
235            '=' => {
236                tok = Token::KEYWORD(vec![self.ch]);
237            }
238            '*' => {
239                tok = Token::KEYWORD(vec![self.ch]);
240            }
241            '%' => {
242                tok = Token::KEYWORD(vec![self.ch]);
243            }
244            '?' => {
245                tok = Token::KEYWORD(vec![self.ch]);
246            }
247            '@' => {
248                if is_letter(self.input[self.position + 1]) {
249                    let mut identifier = vec![self.ch];
250                    self.read_char();
251                    identifier.append(&mut read_identifier(self));
252                    return Token::KEYWORD(identifier);
253                }
254                tok = Token::CH(self.ch);
255            }
256            _ => {
257                return if is_letter(self.ch) {
258                    #[allow(unused_variables)]
259                    let start_position = self.position;
260                    #[allow(unused_mut)]
261                    let mut identifier: Vec<char> = read_identifier(self);
262                    if self.ch.is_numeric() {
263                        let position = self.position;
264                        while self.position < self.input.len() {
265                            if !self.ch.is_numeric() && !is_letter(self.ch) {
266                                break;
267                            }
268                            self.read_char();
269                        }
270                        identifier.append(&mut self.input[position..self.position].to_vec());
271                    }
272                    match get_keyword_token(&identifier) {
273                        Ok(keyword_token) => keyword_token,
274                        Err(_) => {
275                            if self.ch == '(' {
276                                return Token::ENTITY(identifier);
277                            } else if self.ch.is_whitespace() {
278                                let mut position = self.position;
279                                let mut ch = self.input[position];
280                                while position < self.input.len() && ch.is_whitespace() {
281                                    position += 1;
282                                    if position < self.input.len() {
283                                        ch = self.input[position];
284                                    }
285                                }
286                                if ch == '(' {
287                                    return Token::ENTITY(identifier);
288                                }
289                            }
290                            Token::IDENT(identifier)
291                        }
292                    }
293                } else if self.ch.is_numeric() {
294                    let mut identifier: Vec<char> = read_number(self);
295                    if self.ch == 'f' {
296                        identifier.append(&mut vec![self.ch]);
297                        self.read_char();
298                    }
299                    Token::INT(identifier)
300                } else if self.ch == '\'' {
301                    let str_value: Vec<char> = read_string(self, '\'');
302                    Token::STRING(str_value)
303                } else if self.ch == '"' {
304                    let str_value: Vec<char> = read_string(self, '"');
305                    Token::STRING(str_value)
306                } else {
307                    Token::ILLEGAL
308                }
309            }
310        }
311        self.read_char();
312        tok
313    }
314}
315
316pub fn get_keyword_token(identifier: &Vec<char>) -> Result<Token, String> {
317    let id: String = identifier.into_iter().collect();
318    match &id[..] {
319        "this" | "true" | "false" | "super" | "null" | "String" | "Long" | "Object" | "Boolean"
320        | "Array" | "List" | "ArrayList" | "Arrays" | "Map" | "HashMap" | "LinkedHashSet" => {
321            Ok(Token::CONSTANT(identifier.clone()))
322        }
323        "abstract" | "byte" | "break" | "class" | "double" | "float" | "final" | "int"
324        | "interface" | "char" | "case" | "default" | "short" | "for" | "package" | "import"
325        | "public" | "private" | "protected" | "extends" | "static" | "void" | "return" | "new"
326        | "if" | "else" | "enum" | "instanceof" | "boolean" | "assert" | "continue" | "native"
327        | "switch" | "synchronized" | "try" | "throw" | "catch" | "volatile" | "while"
328        | "throws" | "finally" | "long" | "do" | "transient" | "strictfp" | "var" => {
329            Ok(Token::KEYWORD(identifier.clone()))
330        }
331        _ => Err(String::from("Not a keyword")),
332    }
333}
334
335pub fn render_html(input: Vec<char>) -> String {
336    let mut l = Lexer::new(input);
337    l.read_char();
338    let mut html = String::new();
339    let mut line = 1;
340    html.push_str("<table class=\"highlight-table\">\n");
341    html.push_str("<tbody>\n");
342    html.push_str("<tr>");
343    html.push_str(&format!(
344        "<td class=\"hl-num\" data-line=\"{}\"></td><td>",
345        line
346    ));
347
348    loop {
349        let token = l.next_token();
350        if token == Token::EOF {
351            html.push_str("</td></tr>\n");
352            break;
353        }
354
355        match token {
356            Token::INT(value) => {
357                html.push_str(&format!(
358                    "<span class=\"hl-c\">{}</span>",
359                    value.iter().collect::<String>()
360                ));
361            }
362            Token::IDENT(value) => {
363                html.push_str(&value.iter().collect::<String>());
364            }
365            Token::STRING(value) => {
366                let mut s = String::new();
367                for ch in value {
368                    if ch == '<' {
369                        s.push_str("&lt;");
370                    } else if ch == '>' {
371                        s.push_str("&gt;");
372                    } else {
373                        s.push(ch);
374                    }
375                }
376                html.push_str(&format!("<span class=\"hl-s\">{}</span>", s));
377            }
378            Token::CH(value) => {
379                html.push(value);
380            }
381            Token::ENTITY(value) => {
382                html.push_str(&format!(
383                    "<span class=\"hl-en\">{}</span>",
384                    value.iter().collect::<String>()
385                ));
386            }
387            Token::CONSTANT(value) => {
388                html.push_str(&format!(
389                    "<span class=\"hl-c\">{}</span>",
390                    value.iter().collect::<String>()
391                ));
392            }
393            Token::KEYWORD(value) => {
394                html.push_str(&format!(
395                    "<span class=\"hl-k\">{}</span>",
396                    value.iter().collect::<String>()
397                ));
398            }
399            Token::COMMENT(value) => {
400                let mut lines = String::new();
401                for ch in value {
402                    if ch == '<' {
403                        lines.push_str("&lt;");
404                    } else if ch == '>' {
405                        lines.push_str("&gt;");
406                    } else {
407                        lines.push(ch);
408                    }
409                }
410                let split = lines.split("\n");
411                let split_len = split.clone().collect::<Vec<&str>>().len();
412                let mut index = 0;
413                for val in split {
414                    if val.len() > 1 {
415                        html.push_str(&format!("<span class=\"hl-cmt\">{}</span>", val));
416                    }
417                    index = index + 1;
418                    if index != split_len {
419                        line = line + 1;
420                        html.push_str("</td></tr>\n");
421                        html.push_str(&format!(
422                            "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
423                            line
424                        ));
425                    }
426                }
427            }
428            Token::ENDL(_) => {
429                line = line + 1;
430                html.push_str("</td></tr>\n");
431                html.push_str(&format!(
432                    "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
433                    line
434                ));
435            }
436            _ => {
437                html.push(l.ch);
438                l.read_char();
439            }
440        }
441    }
442
443    html.push_str("</tbody>\n");
444    html.push_str("</table>");
445    html
446}