1pub mod token_type;
2use crate::language::CssLanguage;
3use oak_core::{Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
4pub use token_type::CssTokenType;
5
6type State<'s, S> = LexerState<'s, S, CssLanguage>;
7
8pub struct CssLexer<'config> {
9 _config: &'config CssLanguage,
10}
11
12impl<'config> CssLexer<'config> {
13 pub fn new(config: &'config CssLanguage) -> Self {
14 Self { _config: config }
15 }
16
17 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
19 let start_pos = state.get_position();
20
21 while let Some(ch) = state.peek() {
22 if ch == ' ' || ch == '\t' {
23 state.advance(ch.len_utf8());
24 }
25 else {
26 break;
27 }
28 }
29
30 if state.get_position() > start_pos {
31 state.add_token(CssTokenType::Whitespace, start_pos, state.get_position());
32 true
33 }
34 else {
35 false
36 }
37 }
38
39 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
41 let start_pos = state.get_position();
42
43 if let Some('\n') = state.peek() {
44 state.advance(1);
45 state.add_token(CssTokenType::Newline, start_pos, state.get_position());
46 true
47 }
48 else if let Some('\r') = state.peek() {
49 state.advance(1);
50 if let Some('\n') = state.peek() {
51 state.advance(1);
52 }
53 state.add_token(CssTokenType::Newline, start_pos, state.get_position());
54 true
55 }
56 else {
57 false
58 }
59 }
60
61 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
63 let start_pos = state.get_position();
64
65 if let Some('/') = state.peek() {
66 if let Some('*') = state.peek_next_n(1) {
67 state.advance(2); while let Some(ch) = state.peek() {
70 if ch == '*' && state.peek_next_n(1) == Some('/') {
71 state.advance(2); break;
73 }
74 state.advance(ch.len_utf8());
75 }
76
77 state.add_token(CssTokenType::Comment, start_pos, state.get_position());
78 true
79 }
80 else {
81 false
82 }
83 }
84 else {
85 false
86 }
87 }
88
89 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
91 let start_pos = state.get_position();
92
93 if let Some(quote) = state.peek() {
94 if quote == '"' || quote == '\'' {
95 state.advance(1); while let Some(ch) = state.peek() {
98 if ch == quote {
99 state.advance(1); break;
101 }
102 else if ch == '\\' {
103 state.advance(1); if state.peek().is_some() {
105 state.advance(1);
106 }
107 }
108 else {
109 state.advance(ch.len_utf8());
110 }
111 }
112
113 state.add_token(CssTokenType::StringLiteral, start_pos, state.get_position());
114 true
115 }
116 else {
117 false
118 }
119 }
120 else {
121 false
122 }
123 }
124
125 fn lex_url<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
127 let start_pos = state.get_position();
128
129 if let Some('u') = state.peek() {
130 if state.peek_next_n(1) == Some('r') && state.peek_next_n(2) == Some('l') && state.peek_next_n(3) == Some('(') {
131 state.advance(4); while let Some(ch) = state.peek() {
135 if ch.is_whitespace() {
136 state.advance(ch.len_utf8());
137 }
138 else {
139 break;
140 }
141 }
142
143 if let Some(quote) = state.peek() {
145 if quote == '"' || quote == '\'' {
146 state.advance(1);
147 while let Some(ch) = state.peek() {
148 if ch == quote {
149 state.advance(1);
150 break;
151 }
152 else if ch == '\\' {
153 state.advance(1);
154 if state.peek().is_some() {
155 state.advance(1);
156 }
157 }
158 else {
159 state.advance(ch.len_utf8());
160 }
161 }
162 }
163 else {
164 while let Some(ch) = state.peek() {
165 if ch == ')' || ch.is_whitespace() {
166 break;
167 }
168 state.advance(ch.len_utf8());
169 }
170 }
171 }
172
173 while let Some(ch) = state.peek() {
175 if ch.is_whitespace() {
176 state.advance(ch.len_utf8());
177 }
178 else {
179 break;
180 }
181 }
182
183 if let Some(')') = state.peek() {
185 state.advance(1);
186 }
187
188 state.add_token(CssTokenType::UrlLiteral, start_pos, state.get_position());
189 true
190 }
191 else {
192 false
193 }
194 }
195 else {
196 false
197 }
198 }
199
200 fn lex_color<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
202 let start_pos = state.get_position();
203
204 if let Some('#') = state.peek() {
205 state.advance(1); let mut count = 0;
208 while let Some(ch) = state.peek() {
209 if ch.is_ascii_hexdigit() {
210 state.advance(1);
211 count += 1;
212 }
213 else {
214 break;
215 }
216 }
217
218 if count == 3 || count == 4 || count == 6 || count == 8 {
219 state.add_token(CssTokenType::ColorLiteral, start_pos, state.get_position());
220 true
221 }
222 else {
223 state.add_token(CssTokenType::Hash, start_pos, start_pos + 1);
226 state.set_position(start_pos + 1);
227 true
228 }
229 }
230 else {
231 false
232 }
233 }
234
235 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
237 let start_pos = state.get_position();
238
239 let mut has_digits = false;
240 if let Some(ch) = state.peek() {
241 if ch == '+' || ch == '-' {
242 state.advance(1);
243 }
244 }
245
246 while let Some(ch) = state.peek() {
247 if ch.is_ascii_digit() {
248 state.advance(1);
249 has_digits = true;
250 }
251 else {
252 break;
253 }
254 }
255
256 if let Some('.') = state.peek() {
257 if let Some(next_ch) = state.peek_next_n(1) {
258 if next_ch.is_ascii_digit() {
259 state.advance(1); while let Some(ch) = state.peek() {
261 if ch.is_ascii_digit() {
262 state.advance(1);
263 has_digits = true;
264 }
265 else {
266 break;
267 }
268 }
269 }
270 }
271 }
272
273 if has_digits {
274 let unit_start = state.get_position();
276 while let Some(ch) = state.peek() {
277 if ch.is_alphabetic() || ch == '%' {
278 state.advance(ch.len_utf8());
279 }
280 else {
281 break;
282 }
283 }
284
285 if state.get_position() > unit_start {
286 state.add_token(CssTokenType::NumberLiteral, start_pos, state.get_position());
288 }
289 else {
290 state.add_token(CssTokenType::NumberLiteral, start_pos, state.get_position());
291 }
292 true
293 }
294 else {
295 state.set_position(start_pos);
296 false
297 }
298 }
299
300 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
302 let start_pos = state.get_position();
303
304 if let Some(ch) = state.peek() {
305 if ch.is_alphabetic() || ch == '_' || ch == '-' {
306 while let Some(ch) = state.peek() {
307 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
308 state.advance(ch.len_utf8());
309 }
310 else {
311 break;
312 }
313 }
314
315 state.add_token(CssTokenType::Identifier, start_pos, state.get_position());
316 true
317 }
318 else {
319 false
320 }
321 }
322 else {
323 false
324 }
325 }
326
327 fn lex_at_rule<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
329 let start_pos = state.get_position();
330
331 if let Some('@') = state.peek() {
332 state.advance(1); let rule_start = state.get_position();
335 while let Some(ch) = state.peek() {
336 if ch.is_alphabetic() || ch == '-' {
337 state.advance(ch.len_utf8());
338 }
339 else {
340 break;
341 }
342 }
343
344 let rule_name = state.get_text_in((rule_start..state.get_position()).into());
345 let token_kind = match rule_name.as_ref() {
346 "import" => CssTokenType::AtImport,
347 "media" => CssTokenType::AtMedia,
348 "keyframes" => CssTokenType::AtKeyframes,
349 "font-face" => CssTokenType::AtFontFace,
350 "charset" => CssTokenType::AtCharset,
351 "namespace" => CssTokenType::AtNamespace,
352 "supports" => CssTokenType::AtSupports,
353 "page" => CssTokenType::AtPage,
354 "document" => CssTokenType::AtDocument,
355 _ => CssTokenType::AtRule,
356 };
357
358 state.add_token(token_kind, start_pos, state.get_position());
359 true
360 }
361 else {
362 false
363 }
364 }
365
366 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
368 let start_pos = state.get_position();
369
370 if let Some(ch) = state.peek() {
371 let token_kind = match ch {
372 '(' => CssTokenType::LeftParen,
373 ')' => CssTokenType::RightParen,
374 '{' => CssTokenType::LeftBrace,
375 '}' => CssTokenType::RightBrace,
376 '[' => CssTokenType::LeftBracket,
377 ']' => CssTokenType::RightBracket,
378 ',' => CssTokenType::Comma,
379 ';' => CssTokenType::Semicolon,
380 _ => return false,
381 };
382
383 state.advance(1);
384 state.add_token(token_kind, start_pos, state.get_position());
385 true
386 }
387 else {
388 false
389 }
390 }
391
392 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
394 let start_pos = state.get_position();
395
396 if let Some(ch) = state.peek() {
397 let token_kind = match ch {
398 ':' => CssTokenType::Colon,
399 '.' => CssTokenType::Dot,
400 '#' => CssTokenType::Hash,
401 '+' => CssTokenType::Plus,
402 '-' => CssTokenType::Minus,
403 '*' => CssTokenType::Star,
404 '/' => CssTokenType::Slash,
405 '=' => CssTokenType::Equals,
406 '~' => CssTokenType::Tilde,
407 '|' => CssTokenType::Pipe,
408 '^' => CssTokenType::Caret,
409 '$' => CssTokenType::Dollar,
410 '>' => CssTokenType::GreaterThan,
411 _ => return false,
412 };
413
414 state.advance(1);
415 state.add_token(token_kind, start_pos, state.get_position());
416 true
417 }
418 else {
419 false
420 }
421 }
422
423 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
424 while state.not_at_end() {
425 let safe_point = state.get_position();
426
427 if self.skip_whitespace(state) {
429 continue;
430 }
431
432 if self.lex_newline(state) {
433 continue;
434 }
435
436 if self.lex_comment(state) {
437 continue;
438 }
439
440 if self.lex_string(state) {
441 continue;
442 }
443
444 if self.lex_url(state) {
445 continue;
446 }
447
448 if self.lex_color(state) {
449 continue;
450 }
451
452 if self.lex_number(state) {
453 continue;
454 }
455
456 if self.lex_at_rule(state) {
457 continue;
458 }
459
460 if self.lex_identifier(state) {
461 continue;
462 }
463
464 if self.lex_delimiter(state) {
465 continue;
466 }
467
468 if self.lex_operator(state) {
469 continue;
470 }
471
472 let start_pos = state.get_position();
474 if let Some(ch) = state.peek() {
475 state.advance(ch.len_utf8());
476 state.add_token(CssTokenType::Error, start_pos, state.get_position());
477 }
478 else {
479 break;
480 }
481
482 state.advance_if_dead_lock(safe_point);
483 }
484 Ok(())
485 }
486}
487
488impl<'config> Lexer<CssLanguage> for CssLexer<'config> {
489 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], mut cache: &'a mut impl oak_core::lexer::LexerCache<CssLanguage>) -> LexOutput<CssLanguage> {
490 let mut state = LexerState::new(source);
491 let result = self.run(&mut state);
492 if result.is_ok() {
493 state.add_eof();
494 }
495 state.finish_with_cache(result, &mut cache)
496 }
497}