1use crate::{kind::CssSyntaxKind, language::CssLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, SourceText, lexer::LexOutput, source::Source};
3
4type State<'input> = LexerState<&'input SourceText, CssLanguage>;
5
6pub struct CssLexer;
7
8impl CssLexer {
9 pub fn new(_config: CssLanguage) -> Self {
10 Self
11 }
12
13 fn skip_whitespace(&self, state: &mut State<'_>) -> bool {
15 let start_pos = state.get_position();
16
17 while let Some(ch) = state.peek() {
18 if ch == ' ' || ch == '\t' {
19 state.advance(ch.len_utf8());
20 }
21 else {
22 break;
23 }
24 }
25
26 if state.get_position() > start_pos {
27 state.add_token(CssSyntaxKind::Whitespace, start_pos, state.get_position());
28 true
29 }
30 else {
31 false
32 }
33 }
34
35 fn lex_newline(&self, state: &mut State<'_>) -> bool {
37 let start_pos = state.get_position();
38
39 if let Some('\n') = state.peek() {
40 state.advance(1);
41 state.add_token(CssSyntaxKind::Newline, start_pos, state.get_position());
42 true
43 }
44 else if let Some('\r') = state.peek() {
45 state.advance(1);
46 if let Some('\n') = state.peek() {
47 state.advance(1);
48 }
49 state.add_token(CssSyntaxKind::Newline, start_pos, state.get_position());
50 true
51 }
52 else {
53 false
54 }
55 }
56
57 fn lex_comment(&self, state: &mut State<'_>) -> bool {
59 let start_pos = state.get_position();
60
61 if let Some('/') = state.peek() {
62 if let Some('*') = state.peek_next_n(1) {
63 state.advance(2); while let Some(ch) = state.peek() {
66 if ch == '*' && state.peek_next_n(1) == Some('/') {
67 state.advance(2); break;
69 }
70 state.advance(ch.len_utf8());
71 }
72
73 state.add_token(CssSyntaxKind::Comment, start_pos, state.get_position());
74 true
75 }
76 else {
77 false
78 }
79 }
80 else {
81 false
82 }
83 }
84
85 fn lex_string(&self, state: &mut State<'_>) -> bool {
87 let start_pos = state.get_position();
88
89 if let Some(quote) = state.peek() {
90 if quote == '"' || quote == '\'' {
91 state.advance(1); while let Some(ch) = state.peek() {
94 if ch == quote {
95 state.advance(1); break;
97 }
98 else if ch == '\\' {
99 state.advance(1); if state.peek().is_some() {
101 state.advance(state.peek().unwrap().len_utf8()); }
103 }
104 else {
105 state.advance(ch.len_utf8());
106 }
107 }
108
109 state.add_token(CssSyntaxKind::StringLiteral, start_pos, state.get_position());
110 true
111 }
112 else {
113 false
114 }
115 }
116 else {
117 false
118 }
119 }
120
121 fn lex_number(&self, state: &mut State<'_>) -> bool {
123 let start_pos = state.get_position();
124
125 if let Some(ch) = state.peek() {
126 if ch.is_ascii_digit() || (ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit())) {
127 while let Some(ch) = state.peek() {
129 if ch.is_ascii_digit() {
130 state.advance(1);
131 }
132 else {
133 break;
134 }
135 }
136
137 if let Some('.') = state.peek() {
139 state.advance(1);
140 while let Some(ch) = state.peek() {
141 if ch.is_ascii_digit() {
142 state.advance(1);
143 }
144 else {
145 break;
146 }
147 }
148 }
149
150 if let Some(ch) = state.peek() {
152 if ch == 'e' || ch == 'E' {
153 state.advance(1);
154 if let Some(sign) = state.peek() {
155 if sign == '+' || sign == '-' {
156 state.advance(1);
157 }
158 }
159 while let Some(ch) = state.peek() {
160 if ch.is_ascii_digit() {
161 state.advance(1);
162 }
163 else {
164 break;
165 }
166 }
167 }
168 }
169
170 while let Some(ch) = state.peek() {
172 if ch.is_alphabetic() || ch == '%' {
173 state.advance(ch.len_utf8());
174 }
175 else {
176 break;
177 }
178 }
179
180 state.add_token(CssSyntaxKind::NumberLiteral, start_pos, state.get_position());
181 true
182 }
183 else {
184 false
185 }
186 }
187 else {
188 false
189 }
190 }
191
192 fn lex_color(&self, state: &mut State<'_>) -> bool {
194 let start_pos = state.get_position();
195
196 if let Some('#') = state.peek() {
197 state.advance(1); let mut hex_count = 0;
200 while let Some(ch) = state.peek() {
201 if ch.is_ascii_hexdigit() && hex_count < 8 {
202 state.advance(1);
203 hex_count += 1;
204 }
205 else {
206 break;
207 }
208 }
209
210 let token_kind = if matches!(hex_count, 3 | 4 | 6 | 8) { CssSyntaxKind::ColorLiteral } else { CssSyntaxKind::Hash };
211
212 state.add_token(token_kind, start_pos, state.get_position());
213 true
214 }
215 else {
216 false
217 }
218 }
219
220 fn lex_url(&self, state: &mut State<'_>) -> bool {
222 let start_pos = state.get_position();
223
224 if let Some('u') = state.peek() {
225 if state.peek_next_n(1) == Some('r') && state.peek_next_n(2) == Some('l') && state.peek_next_n(3) == Some('(') {
226 state.advance(4); while let Some(ch) = state.peek() {
230 if ch.is_whitespace() {
231 state.advance(ch.len_utf8());
232 }
233 else {
234 break;
235 }
236 }
237
238 if let Some(quote) = state.peek() {
240 if quote == '"' || quote == '\'' {
241 self.lex_string(state);
242 }
243 else {
244 while let Some(ch) = state.peek() {
245 if ch == ')' || ch.is_whitespace() {
246 break;
247 }
248 state.advance(ch.len_utf8());
249 }
250 }
251 }
252
253 while let Some(ch) = state.peek() {
255 if ch.is_whitespace() {
256 state.advance(ch.len_utf8());
257 }
258 else {
259 break;
260 }
261 }
262
263 if let Some(')') = state.peek() {
265 state.advance(1);
266 }
267
268 state.add_token(CssSyntaxKind::UrlLiteral, start_pos, state.get_position());
269 true
270 }
271 else {
272 false
273 }
274 }
275 else {
276 false
277 }
278 }
279
280 fn lex_identifier(&self, state: &mut State<'_>) -> bool {
282 let start_pos = state.get_position();
283
284 if let Some(ch) = state.peek() {
285 if ch.is_alphabetic() || ch == '_' || ch == '-' {
286 while let Some(ch) = state.peek() {
287 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
288 state.advance(ch.len_utf8());
289 }
290 else {
291 break;
292 }
293 }
294
295 state.add_token(CssSyntaxKind::Identifier, start_pos, state.get_position());
296 true
297 }
298 else {
299 false
300 }
301 }
302 else {
303 false
304 }
305 }
306
307 fn lex_at_rule(&self, state: &mut State<'_>, source: &SourceText) -> bool {
309 let start_pos = state.get_position();
310
311 if let Some('@') = state.peek() {
312 state.advance(1); let rule_start = state.get_position();
315 while let Some(ch) = state.peek() {
316 if ch.is_alphabetic() || ch == '-' {
317 state.advance(ch.len_utf8());
318 }
319 else {
320 break;
321 }
322 }
323
324 let rule_name = source.get_text_in((rule_start..state.get_position()).into());
325 let token_kind = match rule_name {
326 "import" => CssSyntaxKind::AtImport,
327 "media" => CssSyntaxKind::AtMedia,
328 "keyframes" => CssSyntaxKind::AtKeyframes,
329 "font-face" => CssSyntaxKind::AtFontFace,
330 "charset" => CssSyntaxKind::AtCharset,
331 "namespace" => CssSyntaxKind::AtNamespace,
332 "supports" => CssSyntaxKind::AtSupports,
333 "page" => CssSyntaxKind::AtPage,
334 "document" => CssSyntaxKind::AtDocument,
335 _ => CssSyntaxKind::AtRule,
336 };
337
338 state.add_token(token_kind, start_pos, state.get_position());
339 true
340 }
341 else {
342 false
343 }
344 }
345
346 fn lex_operator(&self, state: &mut State<'_>) -> bool {
348 let start_pos = state.get_position();
349
350 if let Some(ch) = state.peek() {
351 let token_kind = match ch {
352 ':' => CssSyntaxKind::Colon,
353 ';' => CssSyntaxKind::Semicolon,
354 ',' => CssSyntaxKind::Comma,
355 '.' => CssSyntaxKind::Dot,
356 '#' => CssSyntaxKind::Hash,
357 '+' => CssSyntaxKind::Plus,
358 '-' => CssSyntaxKind::Minus,
359 '*' => CssSyntaxKind::Star,
360 '/' => CssSyntaxKind::Slash,
361 '=' => CssSyntaxKind::Equals,
362 '~' => CssSyntaxKind::Tilde,
363 '|' => CssSyntaxKind::Pipe,
364 '^' => CssSyntaxKind::Caret,
365 '$' => CssSyntaxKind::Dollar,
366 '>' => CssSyntaxKind::GreaterThan,
367 _ => return false,
368 };
369
370 state.advance(1);
371 state.add_token(token_kind, start_pos, state.get_position());
372 true
373 }
374 else {
375 false
376 }
377 }
378
379 fn lex_delimiter(&self, state: &mut State<'_>) -> bool {
381 let start_pos = state.get_position();
382
383 if let Some(ch) = state.peek() {
384 let token_kind = match ch {
385 '(' => CssSyntaxKind::LeftParen,
386 ')' => CssSyntaxKind::RightParen,
387 '{' => CssSyntaxKind::LeftBrace,
388 '}' => CssSyntaxKind::RightBrace,
389 '[' => CssSyntaxKind::LeftBracket,
390 ']' => CssSyntaxKind::RightBracket,
391 _ => return false,
392 };
393
394 state.advance(1);
395 state.add_token(token_kind, start_pos, state.get_position());
396 true
397 }
398 else {
399 false
400 }
401 }
402}
403
404impl Lexer<CssLanguage> for CssLexer {
405 fn lex(&self, source: impl Source) -> LexOutput<CssLanguage> {
406 let source_text = SourceText::new(source.get_text_in((0..source.length()).into()));
407 let mut state = LexerState::new(&source_text);
408
409 while state.not_at_end() {
410 if self.skip_whitespace(&mut state) {
412 continue;
413 }
414
415 if self.lex_newline(&mut state) {
416 continue;
417 }
418
419 if self.lex_comment(&mut state) {
420 continue;
421 }
422
423 if self.lex_string(&mut state) {
424 continue;
425 }
426
427 if self.lex_url(&mut state) {
428 continue;
429 }
430
431 if self.lex_color(&mut state) {
432 continue;
433 }
434
435 if self.lex_number(&mut state) {
436 continue;
437 }
438
439 if self.lex_at_rule(&mut state, &source_text) {
440 continue;
441 }
442
443 if self.lex_identifier(&mut state) {
444 continue;
445 }
446
447 if self.lex_delimiter(&mut state) {
448 continue;
449 }
450
451 if self.lex_operator(&mut state) {
452 continue;
453 }
454
455 let start_pos = state.get_position();
457 if let Some(ch) = state.peek() {
458 state.advance(ch.len_utf8());
459 state.add_token(CssSyntaxKind::Error, start_pos, state.get_position());
460 }
461 }
462
463 let eof_pos = state.get_position();
465 state.add_token(CssSyntaxKind::Eof, eof_pos, eof_pos);
466
467 state.finish(Ok(()))
468 }
469
470 fn lex_incremental(
471 &self,
472 source: impl Source,
473 _changed: usize,
474 _cache: IncrementalCache<CssLanguage>,
475 ) -> LexOutput<CssLanguage> {
476 let source_text = SourceText::new(source.get_text_in((0..source.length()).into()));
477 let mut state = LexerState::new_with_cache(&source_text, _changed, _cache);
478
479 while state.not_at_end() {
480 if self.skip_whitespace(&mut state) {
482 continue;
483 }
484
485 if self.lex_newline(&mut state) {
486 continue;
487 }
488
489 if self.lex_comment(&mut state) {
490 continue;
491 }
492
493 if self.lex_string(&mut state) {
494 continue;
495 }
496
497 if self.lex_url(&mut state) {
498 continue;
499 }
500
501 if self.lex_color(&mut state) {
502 continue;
503 }
504
505 if self.lex_number(&mut state) {
506 continue;
507 }
508
509 if self.lex_at_rule(&mut state, &source_text) {
510 continue;
511 }
512
513 if self.lex_identifier(&mut state) {
514 continue;
515 }
516
517 if self.lex_delimiter(&mut state) {
518 continue;
519 }
520
521 if self.lex_operator(&mut state) {
522 continue;
523 }
524
525 let start_pos = state.get_position();
527 if let Some(ch) = state.peek() {
528 state.advance(ch.len_utf8());
529 state.add_token(CssSyntaxKind::Error, start_pos, state.get_position());
530 }
531 }
532
533 let eof_pos = state.get_position();
535 state.add_token(CssSyntaxKind::Eof, eof_pos, eof_pos);
536
537 state.finish(Ok(()))
538 }
539}