1use crate::lexers::Token;
3
4pub struct Lexer {
5 input: Vec<char>,
6 pub position: usize,
7 pub read_position: usize,
8 pub ch: char,
9}
10
11fn is_letter(ch: char) -> bool {
12 ch.is_alphabetic() || ch == '_'
13}
14
15impl Lexer {
16 pub fn new(input: Vec<char>) -> Self {
17 Self {
18 input,
19 position: 0,
20 read_position: 0,
21 ch: '\0',
22 }
23 }
24
25 pub fn read_char(&mut self) {
26 if self.read_position >= self.input.len() {
27 self.ch = '\0';
28 } else {
29 self.ch = self.input[self.read_position];
30 }
31 self.position = self.read_position;
32 self.read_position += 1;
33 }
34
35 pub fn next_token(&mut self) -> Token {
36 let read_identifier = |l: &mut Lexer| -> Vec<char> {
37 let position = l.position;
38 while l.position < l.input.len() && is_letter(l.ch) {
39 l.read_char();
40 if l.ch == '-' {
41 l.read_char();
42 }
43 }
44 l.input[position..l.position].to_vec()
45 };
46
47 let read_string = |l: &mut Lexer, ch: char| -> Vec<char> {
48 let position = l.position;
49 l.read_char();
50 while l.position < l.input.len() && l.ch != ch {
51 if l.ch == '\\' {
52 l.read_char();
53 }
54 l.read_char();
55 }
56 l.read_char();
57 if l.position > l.input.len() {
58 l.position -= 1;
59 l.read_position -= 1;
60 }
61 l.input[position..l.position].to_vec()
62 };
63
64 let read_number = |l: &mut Lexer| -> Vec<char> {
65 let position = l.position;
66 while l.position < l.input.len() && l.ch.is_numeric() {
67 l.read_char();
68 }
69 l.input[position..l.position].to_vec()
70 };
71
72 let tok: Token;
73 if self.ch == '$' {
74 let start_position = self.position;
75 while self.position < self.input.len()
76 && !is_letter(self.ch)
77 && !self.ch.is_numeric()
78 && self.ch != '\n'
79 && self.ch != ' '
80 {
81 self.read_char();
82 }
83 let identifier = self.input[start_position..self.position].to_vec();
84 return Token::IDENT(identifier);
85 }
86
87 if self.ch == '<' {
88 let next_ch = self.input[self.position + 1];
89 if self.position + 5 < self.input.len()
90 && next_ch == '<'
91 && self.input[self.position + 2] == 'E'
92 && self.input[self.position + 3] == 'O'
93 && self.input[self.position + 4] == 'F'
94 {
95 let mut comment = String::from("<<EOF").chars().collect::<Vec<_>>();
96 self.read_char();
97 self.read_char();
98 self.read_char();
99 self.read_char();
100 self.read_char();
101 let last_position = self.position;
102 while self.position < self.input.len() {
103 if self.ch == 'E' {
104 if self.input[self.position + 1] == 'O' {
105 if self.input[self.position + 2] == 'F' {
106 self.read_char();
107 self.read_char();
108 self.read_char();
109 break;
110 }
111 }
112 }
113 self.read_char();
114 }
115 comment.append(&mut self.input[last_position..self.position].to_vec());
116 return Token::STRING(comment);
117 }
118 }
119 if self.ch == '#' {
120 return Token::COMMENT(read_string(self, '\n'));
121 }
122
123 match self.ch {
124 '\n' => {
125 tok = Token::ENDL(self.ch);
126 }
127 '\0' => {
128 tok = Token::EOF;
129 }
130 '!' => {
131 tok = Token::KEYWORD(vec![self.ch]);
132 }
133 '[' => {
134 tok = Token::KEYWORD(vec![self.ch]);
135 }
136 ']' => {
137 tok = Token::KEYWORD(vec![self.ch]);
138 }
139 '|' => {
140 tok = Token::KEYWORD(vec![self.ch]);
141 }
142 '-' => {
143 let next_ch = self.input[self.position + 1];
144 if is_letter(next_ch) || next_ch == '-' {
145 let mut identifier = vec![self.ch];
146 self.read_char();
147 if self.input[self.position + 1] == '-' {
148 self.read_char();
149 identifier.append(&mut vec![self.ch]);
150 }
151 identifier.append(&mut read_identifier(self));
152 return Token::KEYWORD(identifier);
153 }
154 tok = Token::CH(self.ch);
155 }
156 _ => {
157 return if is_letter(self.ch) {
158 #[allow(unused_variables)]
159 let start_position = self.position;
160 #[allow(unused_mut)]
161 let mut identifier: Vec<char> = read_identifier(self);
162 if self.ch.is_numeric() {
163 let position = self.position;
164 while self.position < self.input.len() {
165 if !self.ch.is_numeric() && !is_letter(self.ch) {
166 break;
167 }
168 self.read_char();
169 }
170 identifier.append(&mut self.input[position..self.position].to_vec());
171 }
172 match get_keyword_token(&identifier) {
173 Ok(keyword_token) => keyword_token,
174 Err(_) => {
175 if self.ch == '(' {
176 return Token::ENTITY(identifier);
177 } else if self.ch.is_whitespace() {
178 let mut position = self.position;
179 let mut ch = self.input[position];
180 while position < self.input.len() && ch.is_whitespace() {
181 position += 1;
182 if position < self.input.len() {
183 ch = self.input[position];
184 }
185 }
186 if ch == '(' {
187 return Token::ENTITY(identifier);
188 }
189 }
190 Token::IDENT(identifier)
191 }
192 }
193 } else if self.ch.is_numeric() {
194 let identifier: Vec<char> = read_number(self);
195 Token::INT(identifier)
196 } else if self.ch == '\'' {
197 if self.position - 1 > 0 && self.input[self.position - 1] == '/' {
198 tok = Token::CH(self.ch);
199 self.read_char();
200 return tok;
201 }
202 let str_value: Vec<char> = read_string(self, '\'');
203 Token::STRING(str_value)
204 } else if self.ch == '"' {
205 let str_value: Vec<char> = read_string(self, '"');
206 Token::STRING(str_value)
207 } else {
208 Token::ILLEGAL
209 }
210 }
211 }
212 self.read_char();
213 tok
214 }
215}
216
217pub fn get_keyword_token(identifier: &Vec<char>) -> Result<Token, String> {
218 let id: String = identifier.into_iter().collect();
219 match &id[..] {
220 "builtin" | "command" | "compgen" | "echo" | "eval" | "exit" | "false" | "hash"
221 | "kill" | "read" | "source" | "unset" | "test" | "true" | "printf" => {
222 Ok(Token::CONSTANT(identifier.clone()))
223 }
224 "alias" | "bg" | "bind" | "break" | "caller" | "case" | "cd" | "complete" | "compopt"
225 | "continue" | "coproc" | "declare" | "dirs" | "disown" | "enable" | "exec" | "export"
226 | "fc" | "fg" | "for" | "function" | "getopts" | "help" => {
227 Ok(Token::KEYWORD(identifier.clone()))
228 }
229 _ => Err(String::from("Not a keyword")),
230 }
231}
232
233pub fn render_html(input: Vec<char>) -> String {
234 let mut l = Lexer::new(input);
235 l.read_char();
236 let mut html = String::new();
237 let mut line = 1;
238 html.push_str("<table class=\"highlight-table\">\n");
239 html.push_str("<tbody>\n");
240 html.push_str("<tr>");
241 html.push_str(&format!(
242 "<td class=\"hl-num\" data-line=\"{}\"></td><td>",
243 line
244 ));
245
246 loop {
247 let token = l.next_token();
248 if token == Token::EOF {
249 html.push_str("</td></tr>\n");
250 break;
251 }
252
253 match token {
254 Token::INT(value) => {
255 html.push_str(&format!(
256 "<span class=\"hl-c\">{}</span>",
257 value.iter().collect::<String>()
258 ));
259 }
260 Token::IDENT(value) => {
261 html.push_str(&value.iter().collect::<String>());
262 }
263 Token::STRING(value) => {
264 let mut s = String::new();
265 for ch in value {
266 if ch == '<' {
267 s.push_str("<");
268 } else if ch == '>' {
269 s.push_str(">");
270 } else {
271 s.push(ch);
272 }
273 }
274 s = s.replace("<<", "<span class=\"hl-k\"><<</span>");
275 s = s.replace("EOF", "<span class=\"hl-k\">EOF</span>");
276 let split = s.split("\n");
277 let split_len = split.clone().collect::<Vec<&str>>().len();
278 let mut index = 0;
279 for val in split {
280 html.push_str(&format!("<span class=\"hl-s\">{}</span>", val));
281 index = index + 1;
282 if index != split_len {
283 line = line + 1;
284 html.push_str("</td></tr>\n");
285 html.push_str(&format!(
286 "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
287 line
288 ));
289 }
290 }
291 }
292 Token::CH(value) => {
293 html.push(value);
294 }
295 Token::ENTITY(value) => {
296 html.push_str(&format!(
297 "<span class=\"hl-en\">{}</span>",
298 value.iter().collect::<String>()
299 ));
300 }
301 Token::CONSTANT(value) => {
302 html.push_str(&format!(
303 "<span class=\"hl-c\">{}</span>",
304 value.iter().collect::<String>()
305 ));
306 }
307 Token::KEYWORD(value) => {
308 html.push_str(&format!(
309 "<span class=\"hl-k\">{}</span>",
310 value.iter().collect::<String>()
311 ));
312 }
313 Token::COMMENT(value) => {
314 let mut lines = String::new();
315 for ch in value {
316 if ch == '<' {
317 lines.push_str("<");
318 } else if ch == '>' {
319 lines.push_str(">");
320 } else {
321 lines.push(ch);
322 }
323 }
324 let split = lines.split("\n");
325 let split_len = split.clone().collect::<Vec<&str>>().len();
326 let mut index = 0;
327 for val in split {
328 if val.len() > 1 {
329 html.push_str(&format!("<span class=\"hl-cmt\">{}</span>", val));
330 }
331 index = index + 1;
332 if index != split_len {
333 line = line + 1;
334 html.push_str("</td></tr>\n");
335 html.push_str(&format!(
336 "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
337 line
338 ));
339 }
340 }
341 }
342 Token::ENDL(_) => {
343 line = line + 1;
344 html.push_str("</td></tr>\n");
345 html.push_str(&format!(
346 "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
347 line
348 ));
349 }
350 _ => {
351 html.push(l.ch);
352 l.read_char();
353 }
354 }
355 }
356
357 html.push_str("</tbody>\n");
358 html.push_str("</table>");
359 html
360}