1#![doc = include_str!("readme.md")]
2pub mod token_type;
3use crate::language::CssLanguage;
4use oak_core::{Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
5pub use token_type::CssTokenType;
6
7type State<'s, S> = LexerState<'s, S, CssLanguage>;
8
9pub struct CssLexer<'config> {
11 _config: &'config CssLanguage,
13}
14
15impl<'config> CssLexer<'config> {
16 pub fn new(config: &'config CssLanguage) -> Self {
18 Self { _config: config }
19 }
20
21 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
23 let start_pos = state.get_position();
24
25 while let Some(ch) = state.peek() {
26 if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
27 }
28
29 if state.get_position() > start_pos {
30 state.add_token(CssTokenType::Whitespace, start_pos, state.get_position());
31 true
32 }
33 else {
34 false
35 }
36 }
37
38 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
40 let start_pos = state.get_position();
41
42 if let Some('\n') = state.peek() {
43 state.advance(1);
44 state.add_token(CssTokenType::Newline, start_pos, state.get_position());
45 true
46 }
47 else if let Some('\r') = state.peek() {
48 state.advance(1);
49 if let Some('\n') = state.peek() {
50 state.advance(1)
51 }
52 state.add_token(CssTokenType::Newline, start_pos, state.get_position());
53 true
54 }
55 else {
56 false
57 }
58 }
59
60 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
62 let start_pos = state.get_position();
63
64 if let Some('/') = state.peek() {
65 if let Some('*') = state.peek_next_n(1) {
66 state.advance(2); while let Some(ch) = state.peek() {
69 if ch == '*' && state.peek_next_n(1) == Some('/') {
70 state.advance(2); break;
72 }
73 state.advance(ch.len_utf8())
74 }
75
76 state.add_token(CssTokenType::Comment, start_pos, state.get_position());
77 true
78 }
79 else {
80 false
81 }
82 }
83 else {
84 false
85 }
86 }
87
88 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
90 let start_pos = state.get_position();
91
92 if let Some(quote) = state.peek() {
93 if quote == '"' || quote == '\'' {
94 state.advance(1); while let Some(ch) = state.peek() {
97 if ch == quote {
98 state.advance(1); break;
100 }
101 else if ch == '\\' {
102 state.advance(1); if state.peek().is_some() {
104 state.advance(1)
105 }
106 }
107 else {
108 state.advance(ch.len_utf8())
109 }
110 }
111
112 state.add_token(CssTokenType::StringLiteral, start_pos, state.get_position());
113 true
114 }
115 else {
116 false
117 }
118 }
119 else {
120 false
121 }
122 }
123
124 fn lex_url<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
126 let start_pos = state.get_position();
127
128 if let Some('u') = state.peek() {
129 if state.peek_next_n(1) == Some('r') && state.peek_next_n(2) == Some('l') && state.peek_next_n(3) == Some('(') {
130 state.advance(4); while let Some(ch) = state.peek() {
134 if ch.is_whitespace() { state.advance(ch.len_utf8()) } else { break }
135 }
136
137 if let Some(quote) = state.peek() {
139 if quote == '"' || quote == '\'' {
140 state.advance(1);
141 while let Some(ch) = state.peek() {
142 if ch == quote {
143 state.advance(1);
144 break;
145 }
146 else if ch == '\\' {
147 state.advance(1);
148 if state.peek().is_some() {
149 state.advance(1)
150 }
151 }
152 else {
153 state.advance(ch.len_utf8())
154 }
155 }
156 }
157 else {
158 while let Some(ch) = state.peek() {
159 if ch == ')' || ch.is_whitespace() {
160 break;
161 }
162 state.advance(ch.len_utf8())
163 }
164 }
165 }
166
167 while let Some(ch) = state.peek() {
169 if ch.is_whitespace() { state.advance(ch.len_utf8()) } else { break }
170 }
171
172 if let Some(')') = state.peek() {
174 state.advance(1)
175 }
176
177 state.add_token(CssTokenType::UrlLiteral, start_pos, state.get_position());
178 true
179 }
180 else {
181 false
182 }
183 }
184 else {
185 false
186 }
187 }
188
189 fn lex_color<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
191 let start_pos = state.get_position();
192
193 if let Some('#') = state.peek() {
194 state.advance(1); let mut count = 0;
197 while let Some(ch) = state.peek() {
198 if ch.is_ascii_hexdigit() {
199 state.advance(1);
200 count += 1
201 }
202 else {
203 break;
204 }
205 }
206
207 if count == 3 || count == 4 || count == 6 || count == 8 {
208 state.add_token(CssTokenType::ColorLiteral, start_pos, state.get_position());
209 true
210 }
211 else {
212 state.add_token(CssTokenType::Hash, start_pos, start_pos + 1);
215 state.set_position(start_pos + 1);
216 true
217 }
218 }
219 else {
220 false
221 }
222 }
223
224 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
226 let start_pos = state.get_position();
227
228 let mut has_digits = false;
229 if let Some(ch) = state.peek() {
230 if ch == '+' || ch == '-' {
231 state.advance(1)
232 }
233 }
234
235 while let Some(ch) = state.peek() {
236 if ch.is_ascii_digit() {
237 state.advance(1);
238 has_digits = true
239 }
240 else {
241 break;
242 }
243 }
244
245 if let Some('.') = state.peek() {
246 if let Some(next_ch) = state.peek_next_n(1) {
247 if next_ch.is_ascii_digit() {
248 state.advance(1); while let Some(ch) = state.peek() {
250 if ch.is_ascii_digit() {
251 state.advance(1);
252 has_digits = true
253 }
254 else {
255 break;
256 }
257 }
258 }
259 }
260 }
261
262 if has_digits {
263 let unit_start = state.get_position();
265 while let Some(ch) = state.peek() {
266 if ch.is_alphabetic() || ch == '%' { state.advance(ch.len_utf8()) } else { break }
267 }
268
269 if state.get_position() > unit_start {
270 state.add_token(CssTokenType::NumberLiteral, start_pos, state.get_position())
272 }
273 else {
274 state.add_token(CssTokenType::NumberLiteral, start_pos, state.get_position())
275 }
276 true
277 }
278 else {
279 state.set_position(start_pos);
280 false
281 }
282 }
283
284 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
286 let start_pos = state.get_position();
287
288 if let Some(ch) = state.peek() {
289 if ch.is_alphabetic() || ch == '_' || ch == '-' {
290 while let Some(ch) = state.peek() {
291 if ch.is_alphanumeric() || ch == '_' || ch == '-' { state.advance(ch.len_utf8()) } else { break }
292 }
293
294 state.add_token(CssTokenType::Identifier, start_pos, state.get_position());
295 true
296 }
297 else {
298 false
299 }
300 }
301 else {
302 false
303 }
304 }
305
306 fn lex_at_rule<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
308 let start_pos = state.get_position();
309
310 if let Some('@') = state.peek() {
311 state.advance(1); let rule_start = state.get_position();
314 while let Some(ch) = state.peek() {
315 if ch.is_alphabetic() || ch == '-' { state.advance(ch.len_utf8()) } else { break }
316 }
317
318 let rule_name = state.get_text_in((rule_start..state.get_position()).into());
319 let token_kind = match rule_name.as_ref() {
320 "import" => CssTokenType::AtImport,
321 "media" => CssTokenType::AtMedia,
322 "keyframes" => CssTokenType::AtKeyframes,
323 "font-face" => CssTokenType::AtFontFace,
324 "charset" => CssTokenType::AtCharset,
325 "namespace" => CssTokenType::AtNamespace,
326 "supports" => CssTokenType::AtSupports,
327 "page" => CssTokenType::AtPage,
328 "document" => CssTokenType::AtDocument,
329 _ => CssTokenType::AtRule,
330 };
331
332 state.add_token(token_kind, start_pos, state.get_position());
333 true
334 }
335 else {
336 false
337 }
338 }
339
340 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
342 let start_pos = state.get_position();
343
344 if let Some(ch) = state.peek() {
345 let token_kind = match ch {
346 '(' => CssTokenType::LeftParen,
347 ')' => CssTokenType::RightParen,
348 '{' => CssTokenType::LeftBrace,
349 '}' => CssTokenType::RightBrace,
350 '[' => CssTokenType::LeftBracket,
351 ']' => CssTokenType::RightBracket,
352 ',' => CssTokenType::Comma,
353 ';' => CssTokenType::Semicolon,
354 _ => return false,
355 };
356
357 state.advance(1);
358 state.add_token(token_kind, start_pos, state.get_position());
359 true
360 }
361 else {
362 false
363 }
364 }
365
366 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
368 let start_pos = state.get_position();
369
370 if let Some(ch) = state.peek() {
371 let token_kind = match ch {
372 ':' => CssTokenType::Colon,
373 '.' => CssTokenType::Dot,
374 '#' => CssTokenType::Hash,
375 '+' => CssTokenType::Plus,
376 '-' => CssTokenType::Minus,
377 '*' => CssTokenType::Star,
378 '/' => CssTokenType::Slash,
379 '=' => CssTokenType::Equals,
380 '~' => CssTokenType::Tilde,
381 '|' => CssTokenType::Pipe,
382 '^' => CssTokenType::Caret,
383 '$' => CssTokenType::Dollar,
384 '>' => CssTokenType::GreaterThan,
385 _ => return false,
386 };
387
388 state.advance(1);
389 state.add_token(token_kind, start_pos, state.get_position());
390 true
391 }
392 else {
393 false
394 }
395 }
396
397 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
399 while state.not_at_end() {
400 let safe_point = state.get_position();
401
402 if self.skip_whitespace(state) {
404 continue;
405 }
406
407 if self.lex_newline(state) {
408 continue;
409 }
410
411 if self.lex_comment(state) {
412 continue;
413 }
414
415 if self.lex_string(state) {
416 continue;
417 }
418
419 if self.lex_url(state) {
420 continue;
421 }
422
423 if self.lex_color(state) {
424 continue;
425 }
426
427 if self.lex_number(state) {
428 continue;
429 }
430
431 if self.lex_at_rule(state) {
432 continue;
433 }
434
435 if self.lex_identifier(state) {
436 continue;
437 }
438
439 if self.lex_delimiter(state) {
440 continue;
441 }
442
443 if self.lex_operator(state) {
444 continue;
445 }
446
447 let start_pos = state.get_position();
449 if let Some(ch) = state.peek() {
450 state.advance(ch.len_utf8());
451 state.add_token(CssTokenType::Error, start_pos, state.get_position())
452 }
453 else {
454 break;
455 }
456
457 state.advance_if_dead_lock(safe_point)
458 }
459 Ok(())
460 }
461}
462
463impl<'config> Lexer<CssLanguage> for CssLexer<'config> {
464 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], mut cache: &'a mut impl oak_core::lexer::LexerCache<CssLanguage>) -> LexOutput<CssLanguage> {
466 let mut state = LexerState::new(source);
467 let result = self.run(&mut state);
468 if result.is_ok() {
469 state.add_eof()
470 }
471 state.finish_with_cache(result, &mut cache)
472 }
473}