hl_core/lexers/
clojure.rs1use crate::lexers::Token;
3
4pub struct Lexer {
5 input: Vec<char>,
6 pub position: usize,
7 pub read_position: usize,
8 pub ch: char,
9}
10
11fn is_letter(ch: char) -> bool {
12 ch.is_alphabetic() || ch == '_'
13}
14
15impl Lexer {
16 pub fn new(input: Vec<char>) -> Self {
17 Self {
18 input,
19 position: 0,
20 read_position: 0,
21 ch: '\0',
22 }
23 }
24
25 pub fn read_char(&mut self) {
26 if self.read_position >= self.input.len() {
27 self.ch = '\0';
28 } else {
29 self.ch = self.input[self.read_position];
30 }
31 self.position = self.read_position;
32 self.read_position += 1;
33 }
34
35 pub fn next_token(&mut self) -> Token {
36 let read_identifier = |l: &mut Lexer| -> Vec<char> {
37 let position = l.position;
38 while l.position < l.input.len() && is_letter(l.ch) {
39 l.read_char();
40 if l.ch == '.' {
41 l.read_char();
42 }
43 if l.ch == '/' {
44 l.read_char();
45 }
46 if l.ch == '-' {
47 l.read_char();
48 }
49 if l.ch == '!' {
50 l.read_char();
51 }
52 if l.ch == '?' {
53 l.read_char();
54 }
55 }
56 l.input[position..l.position].to_vec()
57 };
58
59 let read_string = |l: &mut Lexer, ch: char| -> Vec<char> {
60 let position = l.position;
61 l.read_char();
62 while l.position < l.input.len() && l.ch != ch {
63 if l.ch == '\\' {
64 l.read_char();
65 }
66 l.read_char();
67 }
68 l.read_char();
69 if l.position > l.input.len() {
70 l.position -= 1;
71 l.read_position -= 1;
72 }
73 l.input[position..l.position].to_vec()
74 };
75
76 let read_number = |l: &mut Lexer| -> Vec<char> {
77 let position = l.position;
78 while l.position < l.input.len() && l.ch.is_numeric() {
79 l.read_char();
80 }
81 l.input[position..l.position].to_vec()
82 };
83
84 let tok: Token;
85 if self.read_position < self.input.len()
86 && self.ch == ';'
87 && self.input[self.read_position] == ';'
88 {
89 return Token::COMMENT(read_string(self, '\n'));
90 }
91
92 match self.ch {
93 '\n' => {
94 tok = Token::ENDL(self.ch);
95 }
96 '\0' => {
97 tok = Token::EOF;
98 }
99 _ => {
100 return if is_letter(self.ch) {
101 #[allow(unused_variables)]
102 let start_position = self.position;
103 #[allow(unused_mut)]
104 let mut identifier: Vec<char> = read_identifier(self);
105 if self.ch.is_numeric() {
106 let position = self.position;
107 while self.position < self.input.len() {
108 if !self.ch.is_numeric() && !is_letter(self.ch) {
109 break;
110 }
111 self.read_char();
112 }
113 identifier.append(&mut self.input[position..self.position].to_vec());
114 }
115 match get_keyword_token(&identifier) {
116 Ok(keyword_token) => keyword_token,
117 Err(_) => {
118 if start_position > 0 && self.input[start_position - 1] == ':' {
119 return Token::ENTITY(identifier);
120 }
121 if start_position > 0 && self.input[start_position - 1] == '(' {
122 return Token::ENTITY(identifier);
123 }
124 Token::IDENT(identifier)
125 }
126 }
127 } else if self.ch.is_numeric() {
128 let identifier: Vec<char> = read_number(self);
129 Token::INT(identifier)
130 } else if self.ch == '"' {
131 let str_value: Vec<char> = read_string(self, '"');
132 Token::STRING(str_value)
133 } else {
134 Token::ILLEGAL
135 }
136 }
137 }
138 self.read_char();
139 tok
140 }
141}
142
143pub fn get_keyword_token(identifier: &Vec<char>) -> Result<Token, String> {
144 let id: String = identifier.into_iter().collect();
145 match &id[..] {
146 "nil" | "full" => Ok(Token::CONSTANT(identifier.clone())),
147 "binding" | "case" | "catch" | "cond" | "do" | "ns" | "def" | "defonce" | "defmulti"
148 | "defmethod" | "defn" | "if" | "fn" | "require" | "when" | "try" | "throw" | "for"
149 | "let" | "defn-" | "in-ns" | "if-let" | "s/defn" | "if-not" | "when-not" => {
150 Ok(Token::KEYWORD(identifier.clone()))
151 }
152 _ => Err(String::from("Not a keyword")),
153 }
154}
155
156pub fn render_html(input: Vec<char>) -> String {
157 let mut l = Lexer::new(input);
158 l.read_char();
159 let mut html = String::new();
160 let mut line = 1;
161 html.push_str("<table class=\"highlight-table\">\n");
162 html.push_str("<tbody>\n");
163 html.push_str("<tr>");
164 html.push_str(&format!(
165 "<td class=\"hl-num\" data-line=\"{}\"></td><td>",
166 line
167 ));
168
169 loop {
170 let token = l.next_token();
171 if token == Token::EOF {
172 html.push_str("</td></tr>\n");
173 break;
174 }
175
176 match token {
177 Token::INT(value) => {
178 html.push_str(&format!(
179 "<span class=\"hl-c\">{}</span>",
180 value.iter().collect::<String>()
181 ));
182 }
183 Token::IDENT(value) => {
184 html.push_str(&value.iter().collect::<String>());
185 }
186 Token::STRING(value) => {
187 let mut s = String::new();
188 for ch in value {
189 if ch == '<' {
190 s.push_str("<");
191 } else if ch == '>' {
192 s.push_str(">");
193 } else {
194 s.push(ch);
195 }
196 }
197 html.push_str(&format!("<span class=\"hl-s\">{}</span>", s));
198 }
199 Token::ENTITY(value) => {
200 html.push_str(&format!(
201 "<span class=\"hl-en\">{}</span>",
202 value.iter().collect::<String>()
203 ));
204 }
205 Token::CONSTANT(value) => {
206 html.push_str(&format!(
207 "<span class=\"hl-c\">{}</span>",
208 value.iter().collect::<String>()
209 ));
210 }
211 Token::KEYWORD(value) => {
212 html.push_str(&format!(
213 "<span class=\"hl-k\">{}</span>",
214 value.iter().collect::<String>()
215 ));
216 }
217 Token::COMMENT(value) => {
218 let mut lines = String::new();
219 for ch in value {
220 if ch == '<' {
221 lines.push_str("<");
222 } else if ch == '>' {
223 lines.push_str(">");
224 } else {
225 lines.push(ch);
226 }
227 }
228 let split = lines.split("\n");
229 let split_len = split.clone().collect::<Vec<&str>>().len();
230 let mut index = 0;
231 for val in split {
232 if val.len() > 1 {
233 html.push_str(&format!("<span class=\"hl-cmt\">{}</span>", val));
234 }
235 index = index + 1;
236 if index != split_len {
237 line = line + 1;
238 html.push_str("</td></tr>\n");
239 html.push_str(&format!(
240 "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
241 line
242 ));
243 }
244 }
245 }
246 Token::ENDL(_) => {
247 line = line + 1;
248 html.push_str("</td></tr>\n");
249 html.push_str(&format!(
250 "<tr><td class=\"hl-num\" data-line=\"{}\"></td><td>",
251 line
252 ));
253 }
254 _ => {
255 html.push(l.ch);
256 l.read_char();
257 }
258 }
259 }
260
261 html.push_str("</tbody>\n");
262 html.push_str("</table>");
263 html
264}