1#![doc = include_str!("readme.md")]
2pub mod token_type;
4use crate::language::CssLanguage;
5use oak_core::{Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
6pub use token_type::CssTokenType;
7
8type State<'s, S> = LexerState<'s, S, CssLanguage>;
9
10pub struct CssLexer<'config> {
12 config: &'config CssLanguage,
14}
15
16impl<'config> CssLexer<'config> {
17 pub fn new(config: &'config CssLanguage) -> Self {
19 Self { config }
20 }
21
22 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
24 let start_pos = state.get_position();
25
26 while let Some(ch) = state.peek() {
27 if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
28 }
29
30 if state.get_position() > start_pos {
31 state.add_token(CssTokenType::Whitespace, start_pos, state.get_position());
32 true
33 }
34 else {
35 false
36 }
37 }
38
39 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
41 let start_pos = state.get_position();
42
43 if let Some('\n') = state.peek() {
44 state.advance(1);
45 state.add_token(CssTokenType::Newline, start_pos, state.get_position());
46 true
47 }
48 else if let Some('\r') = state.peek() {
49 state.advance(1);
50 if let Some('\n') = state.peek() {
51 state.advance(1)
52 }
53 state.add_token(CssTokenType::Newline, start_pos, state.get_position());
54 true
55 }
56 else {
57 false
58 }
59 }
60
61 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
63 let start_pos = state.get_position();
64
65 if let Some('/') = state.peek() {
66 if let Some('*') = state.peek_next_n(1) {
67 state.advance(2); while let Some(ch) = state.peek() {
70 if ch == '*' && state.peek_next_n(1) == Some('/') {
71 state.advance(2); break;
73 }
74 state.advance(ch.len_utf8())
75 }
76
77 state.add_token(CssTokenType::Comment, start_pos, state.get_position());
78 true
79 }
80 else {
81 false
82 }
83 }
84 else {
85 false
86 }
87 }
88
89 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
91 let start_pos = state.get_position();
92
93 if let Some(quote) = state.peek() {
94 if quote == '"' || quote == '\'' {
95 state.advance(1); while let Some(ch) = state.peek() {
98 if ch == quote {
99 state.advance(1); break;
101 }
102 else if ch == '\\' {
103 state.advance(1); if state.peek().is_some() {
105 state.advance(1)
106 }
107 }
108 else {
109 state.advance(ch.len_utf8())
110 }
111 }
112
113 state.add_token(CssTokenType::StringLiteral, start_pos, state.get_position());
114 true
115 }
116 else {
117 false
118 }
119 }
120 else {
121 false
122 }
123 }
124
125 fn lex_url<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
127 let start_pos = state.get_position();
128
129 if let Some('u') = state.peek() {
130 if state.peek_next_n(1) == Some('r') && state.peek_next_n(2) == Some('l') && state.peek_next_n(3) == Some('(') {
131 state.advance(4); while let Some(ch) = state.peek() {
135 if ch.is_whitespace() { state.advance(ch.len_utf8()) } else { break }
136 }
137
138 if let Some(quote) = state.peek() {
140 if quote == '"' || quote == '\'' {
141 state.advance(1);
142 while let Some(ch) = state.peek() {
143 if ch == quote {
144 state.advance(1);
145 break;
146 }
147 else if ch == '\\' {
148 state.advance(1);
149 if state.peek().is_some() {
150 state.advance(1)
151 }
152 }
153 else {
154 state.advance(ch.len_utf8())
155 }
156 }
157 }
158 else {
159 while let Some(ch) = state.peek() {
160 if ch == ')' || ch.is_whitespace() {
161 break;
162 }
163 state.advance(ch.len_utf8())
164 }
165 }
166 }
167
168 while let Some(ch) = state.peek() {
170 if ch.is_whitespace() { state.advance(ch.len_utf8()) } else { break }
171 }
172
173 if let Some(')') = state.peek() {
175 state.advance(1)
176 }
177
178 state.add_token(CssTokenType::UrlLiteral, start_pos, state.get_position());
179 true
180 }
181 else {
182 false
183 }
184 }
185 else {
186 false
187 }
188 }
189
190 fn lex_color<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
192 let start_pos = state.get_position();
193
194 if let Some('#') = state.peek() {
195 state.advance(1); let mut count = 0;
198 while let Some(ch) = state.peek() {
199 if ch.is_ascii_hexdigit() {
200 state.advance(1);
201 count += 1
202 }
203 else {
204 break;
205 }
206 }
207
208 if count == 3 || count == 4 || count == 6 || count == 8 {
209 state.add_token(CssTokenType::ColorLiteral, start_pos, state.get_position());
210 true
211 }
212 else {
213 state.add_token(CssTokenType::Hash, start_pos, start_pos + 1);
216 state.set_position(start_pos + 1);
217 true
218 }
219 }
220 else {
221 false
222 }
223 }
224
225 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
227 let start_pos = state.get_position();
228
229 let mut has_digits = false;
230 if let Some(ch) = state.peek() {
231 if ch == '+' || ch == '-' {
232 state.advance(1)
233 }
234 }
235
236 while let Some(ch) = state.peek() {
237 if ch.is_ascii_digit() {
238 state.advance(1);
239 has_digits = true
240 }
241 else {
242 break;
243 }
244 }
245
246 if let Some('.') = state.peek() {
247 if let Some(next_ch) = state.peek_next_n(1) {
248 if next_ch.is_ascii_digit() {
249 state.advance(1); while let Some(ch) = state.peek() {
251 if ch.is_ascii_digit() {
252 state.advance(1);
253 has_digits = true
254 }
255 else {
256 break;
257 }
258 }
259 }
260 }
261 }
262
263 if has_digits {
264 let unit_start = state.get_position();
266 while let Some(ch) = state.peek() {
267 if ch.is_alphabetic() || ch == '%' { state.advance(ch.len_utf8()) } else { break }
268 }
269
270 if state.get_position() > unit_start {
271 state.add_token(CssTokenType::NumberLiteral, start_pos, state.get_position())
273 }
274 else {
275 state.add_token(CssTokenType::NumberLiteral, start_pos, state.get_position())
276 }
277 true
278 }
279 else {
280 state.set_position(start_pos);
281 false
282 }
283 }
284
285 fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
287 let start_pos = state.get_position();
288
289 if let Some(ch) = state.peek() {
290 if ch.is_alphabetic() || ch == '_' || ch == '-' {
291 while let Some(ch) = state.peek() {
292 if ch.is_alphanumeric() || ch == '_' || ch == '-' { state.advance(ch.len_utf8()) } else { break }
293 }
294
295 state.add_token(CssTokenType::Identifier, start_pos, state.get_position());
296 true
297 }
298 else {
299 false
300 }
301 }
302 else {
303 false
304 }
305 }
306
307 fn lex_at_rule<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
309 let start_pos = state.get_position();
310
311 if let Some('@') = state.peek() {
312 state.advance(1); let rule_start = state.get_position();
315 while let Some(ch) = state.peek() {
316 if ch.is_alphabetic() || ch == '-' { state.advance(ch.len_utf8()) } else { break }
317 }
318
319 let rule_name = state.get_text_in((rule_start..state.get_position()).into());
320 let token_kind = match rule_name.as_ref() {
321 "import" => CssTokenType::AtImport,
322 "media" => CssTokenType::AtMedia,
323 "keyframes" => CssTokenType::AtKeyframes,
324 "font-face" => CssTokenType::AtFontFace,
325 "charset" => CssTokenType::AtCharset,
326 "namespace" => CssTokenType::AtNamespace,
327 "supports" => CssTokenType::AtSupports,
328 "page" => CssTokenType::AtPage,
329 "document" => CssTokenType::AtDocument,
330 _ => CssTokenType::AtRule,
331 };
332
333 state.add_token(token_kind, start_pos, state.get_position());
334 true
335 }
336 else {
337 false
338 }
339 }
340
341 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
343 let start_pos = state.get_position();
344
345 if let Some(ch) = state.peek() {
346 let token_kind = match ch {
347 '(' => CssTokenType::LeftParen,
348 ')' => CssTokenType::RightParen,
349 '{' => CssTokenType::LeftBrace,
350 '}' => CssTokenType::RightBrace,
351 '[' => CssTokenType::LeftBracket,
352 ']' => CssTokenType::RightBracket,
353 ',' => CssTokenType::Comma,
354 ';' => CssTokenType::Semicolon,
355 _ => return false,
356 };
357
358 state.advance(1);
359 state.add_token(token_kind, start_pos, state.get_position());
360 true
361 }
362 else {
363 false
364 }
365 }
366
367 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
369 let start_pos = state.get_position();
370
371 if let Some(ch) = state.peek() {
372 let token_kind = match ch {
373 ':' => CssTokenType::Colon,
374 '.' => CssTokenType::Dot,
375 '#' => CssTokenType::Hash,
376 '+' => CssTokenType::Plus,
377 '-' => CssTokenType::Minus,
378 '*' => CssTokenType::Star,
379 '/' => CssTokenType::Slash,
380 '=' => CssTokenType::Equals,
381 '~' => CssTokenType::Tilde,
382 '|' => CssTokenType::Pipe,
383 '^' => CssTokenType::Caret,
384 '$' => CssTokenType::Dollar,
385 '>' => CssTokenType::GreaterThan,
386 _ => return false,
387 };
388
389 state.advance(1);
390 state.add_token(token_kind, start_pos, state.get_position());
391 true
392 }
393 else {
394 false
395 }
396 }
397
398 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
400 while state.not_at_end() {
401 let safe_point = state.get_position();
402
403 if self.skip_whitespace(state) {
405 continue;
406 }
407
408 if self.lex_newline(state) {
409 continue;
410 }
411
412 if self.lex_comment(state) {
413 continue;
414 }
415
416 if self.lex_string(state) {
417 continue;
418 }
419
420 if self.lex_url(state) {
421 continue;
422 }
423
424 if self.lex_color(state) {
425 continue;
426 }
427
428 if self.lex_number(state) {
429 continue;
430 }
431
432 if self.lex_at_rule(state) {
433 continue;
434 }
435
436 if self.lex_identifier(state) {
437 continue;
438 }
439
440 if self.lex_delimiter(state) {
441 continue;
442 }
443
444 if self.lex_operator(state) {
445 continue;
446 }
447
448 let start_pos = state.get_position();
450 if let Some(ch) = state.peek() {
451 state.advance(ch.len_utf8());
452 state.add_token(CssTokenType::Error, start_pos, state.get_position())
453 }
454 else {
455 break;
456 }
457
458 state.advance_if_dead_lock(safe_point)
459 }
460 Ok(())
461 }
462}
463
464impl<'config> Lexer<CssLanguage> for CssLexer<'config> {
465 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], mut cache: &'a mut impl oak_core::lexer::LexerCache<CssLanguage>) -> LexOutput<CssLanguage> {
467 let mut state = LexerState::new(source);
468 let result = self.run(&mut state);
469 if result.is_ok() {
470 state.add_eof()
471 }
472 state.finish_with_cache(result, &mut cache)
473 }
474}