hl_core/lexers/
bash.rs

1// ---- DON'T EDIT! THIS IS AUTO GENERATED CODE ---- //
2use crate::lexers::Token;
3
4pub struct Lexer {
5    input: Vec<char>,
6    pub position: usize,
7    pub read_position: usize,
8    pub ch: char,
9}
10
11fn is_letter(ch: char) -> bool {
12    ch.is_alphabetic() || ch == '_'
13}
14
15impl Lexer {
16    pub fn new(input: Vec<char>) -> Self {
17        Self {
18            input,
19            position: 0,
20            read_position: 0,
21            ch: '\0',
22        }
23    }
24
25    pub fn read_char(&mut self) {
26        if self.read_position >= self.input.len() {
27            self.ch = '\0';
28        } else {
29            self.ch = self.input[self.read_position];
30        }
31        self.position = self.read_position;
32        self.read_position += 1;
33    }
34
35    pub fn next_token(&mut self) -> Token {
36        let read_identifier = |l: &mut Lexer| -> Vec<char> {
37            let position = l.position;
38            while l.position < l.input.len() && is_letter(l.ch) {
39                l.read_char();
40                if l.ch == '-' {
41                    l.read_char();
42                }
43            }
44            l.input[position..l.position].to_vec()
45        };
46
47        let read_string = |l: &mut Lexer, ch: char| -> Vec<char> {
48            let position = l.position;
49            l.read_char();
50            while l.position < l.input.len() && l.ch != ch {
51                if l.ch == '\\' {
52                    l.read_char();
53                }
54                l.read_char();
55            }
56            l.read_char();
57            if l.position > l.input.len() {
58                l.position -= 1;
59                l.read_position -= 1;
60            }
61            l.input[position..l.position].to_vec()
62        };
63
64        let read_number = |l: &mut Lexer| -> Vec<char> {
65            let position = l.position;
66            while l.position < l.input.len() && l.ch.is_numeric() {
67                l.read_char();
68            }
69            l.input[position..l.position].to_vec()
70        };
71
72        let tok: Token;
73        if self.ch == '$' {
74            let start_position = self.position;
75            while self.position < self.input.len()
76                && !is_letter(self.ch)
77                && !self.ch.is_numeric()
78                && self.ch != '\n'
79                && self.ch != ' '
80            {
81                self.read_char();
82            }
83            let identifier = self.input[start_position..self.position].to_vec();
84            return Token::IDENT(identifier);
85        }
86
87        if self.ch == '<' {
88            let next_ch = self.input[self.position + 1];
89            if self.position + 5 < self.input.len()
90                && next_ch == '<'
91                && self.input[self.position + 2] == 'E'
92                && self.input[self.position + 3] == 'O'
93                && self.input[self.position + 4] == 'F'
94            {
95                let mut comment = String::from("<<EOF").chars().collect::<Vec<_>>();
96                self.read_char();
97                self.read_char();
98                self.read_char();
99                self.read_char();
100                self.read_char();
101                let last_position = self.position;
102                while self.position < self.input.len() {
103                    if self.ch == 'E' {
104                        if self.input[self.position + 1] == 'O' {
105                            if self.input[self.position + 2] == 'F' {
106                                self.read_char();
107                                self.read_char();
108                                self.read_char();
109                                break;
110                            }
111                        }
112                    }
113                    self.read_char();
114                }
115                comment.append(&mut self.input[last_position..self.position].to_vec());
116                return Token::STRING(comment);
117            }
118        }
119        if self.ch == '#' {
120            return Token::COMMENT(read_string(self, '\n'));
121        }
122
123        match self.ch {
124            '\n' => {
125                tok = Token::ENDL(self.ch);
126            }
127            '\0' => {
128                tok = Token::EOF;
129            }
130            '!' => {
131                tok = Token::KEYWORD(vec![self.ch]);
132            }
133            '[' => {
134                tok = Token::KEYWORD(vec![self.ch]);
135            }
136            ']' => {
137                tok = Token::KEYWORD(vec![self.ch]);
138            }
139            '|' => {
140                tok = Token::KEYWORD(vec![self.ch]);
141            }
142            '-' => {
143                let next_ch = self.input[self.position + 1];
144                if is_letter(next_ch) || next_ch == '-' {
145                    let mut identifier = vec![self.ch];
146                    self.read_char();
147                    if self.input[self.position + 1] == '-' {
148                        self.read_char();
149                        identifier.append(&mut vec![self.ch]);
150                    }
151                    identifier.append(&mut read_identifier(self));
152                    return Token::KEYWORD(identifier);
153                }
154                tok = Token::CH(self.ch);
155            }
156            _ => {
157                return if is_letter(self.ch) {
158                    #[allow(unused_variables)]
159                    let start_position = self.position;
160                    #[allow(unused_mut)]
161                    let mut identifier: Vec<char> = read_identifier(self);
162                    if self.ch.is_numeric() {
163                        let position = self.position;
164                        while self.position < self.input.len() {
165                            if !self.ch.is_numeric() && !is_letter(self.ch) {
166                                break;
167                            }
168                            self.read_char();
169                        }
170                        identifier.append(&mut self.input[position..self.position].to_vec());
171                    }
172                    match get_keyword_token(&identifier) {
173                        Ok(keyword_token) => keyword_token,
174                        Err(_) => {
175                            if self.ch == '(' {
176                                return Token::ENTITY(identifier);
177                            } else if self.ch.is_whitespace() {
178                                let mut position = self.position;
179                                let mut ch = self.input[position];
180                                while position < self.input.len() && ch.is_whitespace() {
181                                    position += 1;
182                                    if position < self.input.len() {
183                                        ch = self.input[position];
184                                    }
185                                }
186                                if ch == '(' {
187                                    return Token::ENTITY(identifier);
188                                }
189                            }
190                            Token::IDENT(identifier)
191                        }
192                    }
193                } else if self.ch.is_numeric() {
194                    let identifier: Vec<char> = read_number(self);
195                    Token::INT(identifier)
196                } else if self.ch == '\'' {
197                    if self.position - 1 > 0 && self.input[self.position - 1] == '/' {
198                        tok = Token::CH(self.ch);
199                        self.read_char();
200                        return tok;
201                    }
202                    let str_value: Vec<char> = read_string(self, '\'');
203                    Token::STRING(str_value)
204                } else if self.ch == '"' {
205                    let str_value: Vec<char> = read_string(self, '"');
206                    Token::STRING(str_value)
207                } else {
208                    Token::ILLEGAL
209                }
210            }
211        }
212        self.read_char();
213        tok
214    }
215}
216
217pub fn get_keyword_token(identifier: &Vec<char>) -> Result<Token, String> {
218    let id: String = identifier.into_iter().collect();
219    match &id[..] {
220        "builtin" | "command" | "compgen" | "echo" | "eval" | "exit" | "false" | "hash"
221        | "kill" | "read" | "source" | "unset" | "test" | "true" | "printf" => {
222            Ok(Token::CONSTANT(identifier.clone()))
223        }
224        "alias" | "bg" | "bind" | "break" | "caller" | "case" | "cd" | "complete" | "compopt"
225        | "continue" | "coproc" | "declare" | "dirs" | "disown" | "enable" | "exec" | "export"
226        | "fc" | "fg" | "for" | "function" | "getopts" | "help" => {
227            Ok(Token::KEYWORD(identifier.clone()))
228        }
229        _ => Err(String::from("Not a keyword")),
230    }
231}
232
233pub fn render_html(input: Vec<char>) -> String {
234    let mut l = Lexer::new(input);
235    l.read_char();
236    let mut html = String::new();
237    let mut line = 1;
238    html.push_str("<table class=\"highlight-table\">\n");
239    html.push_str("<tbody>\n");
240    html.push_str("<tr>");
241    html.push_str(&format!(
242        "<td class=\"hl-num\" data-line=\"{}\"></td><td>",
243        line
244    ));
245
246    loop {
247        let token = l.next_token();
248        if token == Token::EOF {
249            html.push_str("</td></tr>\n");
250            break;
251        }
252
253        match token {
254            Token::INT(value) => {
255                html.push_str(&format!(
256                    "<span class=\"hl-c\">{}</span>",
257                    value.iter().collect::<String>()
258                ));
259            }
260            Token::IDENT(value) => {
261                html.push_str(&value.iter().collect::<String>());
262            }
263            Token::STRING(value) => {
264                let mut s = String::new();
265                for ch in value {
266                    if ch == '<' {
267                        s.push_str("&lt;");
268                    } else if ch == '>' {
269                        s.push_str("&gt;");
270                    } else {
271                        s.push(ch);
272                    }
273                }
274                s = s.replace("&lt;&lt;", "<span class=\"hl-k\">&lt;&lt;</span>");
275                s = s.replace("EOF", "<span class=\"hl-k\">EOF</span>");
276                let split = s.split("\n");
277                let split_len = split.clone().collect::<Vec<&str>>().len();
278                let mut index = 0;
279                for val in split {
280                    html.push_str(&format!("<span class=\"hl-s\">{}</span>", val));
281                    index = index + 1;
282                    if index != split_len {
283                        line = line + 1;
284                        html.push_str("</td></tr>\n");
285                        html.push_str(&format!(
286                            "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
287                            line
288                        ));
289                    }
290                }
291            }
292            Token::CH(value) => {
293                html.push(value);
294            }
295            Token::ENTITY(value) => {
296                html.push_str(&format!(
297                    "<span class=\"hl-en\">{}</span>",
298                    value.iter().collect::<String>()
299                ));
300            }
301            Token::CONSTANT(value) => {
302                html.push_str(&format!(
303                    "<span class=\"hl-c\">{}</span>",
304                    value.iter().collect::<String>()
305                ));
306            }
307            Token::KEYWORD(value) => {
308                html.push_str(&format!(
309                    "<span class=\"hl-k\">{}</span>",
310                    value.iter().collect::<String>()
311                ));
312            }
313            Token::COMMENT(value) => {
314                let mut lines = String::new();
315                for ch in value {
316                    if ch == '<' {
317                        lines.push_str("&lt;");
318                    } else if ch == '>' {
319                        lines.push_str("&gt;");
320                    } else {
321                        lines.push(ch);
322                    }
323                }
324                let split = lines.split("\n");
325                let split_len = split.clone().collect::<Vec<&str>>().len();
326                let mut index = 0;
327                for val in split {
328                    if val.len() > 1 {
329                        html.push_str(&format!("<span class=\"hl-cmt\">{}</span>", val));
330                    }
331                    index = index + 1;
332                    if index != split_len {
333                        line = line + 1;
334                        html.push_str("</td></tr>\n");
335                        html.push_str(&format!(
336                            "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
337                            line
338                        ));
339                    }
340                }
341            }
342            Token::ENDL(_) => {
343                line = line + 1;
344                html.push_str("</td></tr>\n");
345                html.push_str(&format!(
346                    "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
347                    line
348                ));
349            }
350            _ => {
351                html.push(l.ch);
352                l.read_char();
353            }
354        }
355    }
356
357    html.push_str("</tbody>\n");
358    html.push_str("</table>");
359    html
360}