Skip to main content

oak_css/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2pub mod token_type;
3use crate::language::CssLanguage;
4use oak_core::{Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
5pub use token_type::CssTokenType;
6
7type State<'s, S> = LexerState<'s, S, CssLanguage>;
8
9/// Lexer for the CSS language.
10pub struct CssLexer<'config> {
11    /// Language configuration.
12    _config: &'config CssLanguage,
13}
14
15impl<'config> CssLexer<'config> {
16    /// Creates a new `CssLexer` with the given language configuration.
17    pub fn new(config: &'config CssLanguage) -> Self {
18        Self { _config: config }
19    }
20
21    /// Skips whitespace characters.
22    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
23        let start_pos = state.get_position();
24
25        while let Some(ch) = state.peek() {
26            if ch == ' ' || ch == '\t' { state.advance(ch.len_utf8()) } else { break }
27        }
28
29        if state.get_position() > start_pos {
30            state.add_token(CssTokenType::Whitespace, start_pos, state.get_position());
31            true
32        }
33        else {
34            false
35        }
36    }
37
38    /// Handles newline characters.
39    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
40        let start_pos = state.get_position();
41
42        if let Some('\n') = state.peek() {
43            state.advance(1);
44            state.add_token(CssTokenType::Newline, start_pos, state.get_position());
45            true
46        }
47        else if let Some('\r') = state.peek() {
48            state.advance(1);
49            if let Some('\n') = state.peek() {
50                state.advance(1)
51            }
52            state.add_token(CssTokenType::Newline, start_pos, state.get_position());
53            true
54        }
55        else {
56            false
57        }
58    }
59
60    /// Handles CSS comments (`/* ... */`).
61    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
62        let start_pos = state.get_position();
63
64        if let Some('/') = state.peek() {
65            if let Some('*') = state.peek_next_n(1) {
66                state.advance(2); // Skip /*
67
68                while let Some(ch) = state.peek() {
69                    if ch == '*' && state.peek_next_n(1) == Some('/') {
70                        state.advance(2); // Skip */
71                        break;
72                    }
73                    state.advance(ch.len_utf8())
74                }
75
76                state.add_token(CssTokenType::Comment, start_pos, state.get_position());
77                true
78            }
79            else {
80                false
81            }
82        }
83        else {
84            false
85        }
86    }
87
88    /// Handles string literals (both single and double quoted).
89    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
90        let start_pos = state.get_position();
91
92        if let Some(quote) = state.peek() {
93            if quote == '"' || quote == '\'' {
94                state.advance(1); // Skip opening quote
95
96                while let Some(ch) = state.peek() {
97                    if ch == quote {
98                        state.advance(1); // Skip closing quote
99                        break;
100                    }
101                    else if ch == '\\' {
102                        state.advance(1); // Skip escape character
103                        if state.peek().is_some() {
104                            state.advance(1)
105                        }
106                    }
107                    else {
108                        state.advance(ch.len_utf8())
109                    }
110                }
111
112                state.add_token(CssTokenType::StringLiteral, start_pos, state.get_position());
113                true
114            }
115            else {
116                false
117            }
118        }
119        else {
120            false
121        }
122    }
123
124    /// Handles CSS URLs (`url(...)`).
125    fn lex_url<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
126        let start_pos = state.get_position();
127
128        if let Some('u') = state.peek() {
129            if state.peek_next_n(1) == Some('r') && state.peek_next_n(2) == Some('l') && state.peek_next_n(3) == Some('(') {
130                state.advance(4); // Skip url(
131
132                // Skip whitespace
133                while let Some(ch) = state.peek() {
134                    if ch.is_whitespace() { state.advance(ch.len_utf8()) } else { break }
135                }
136
137                // Check for quoted or unquoted URL
138                if let Some(quote) = state.peek() {
139                    if quote == '"' || quote == '\'' {
140                        state.advance(1);
141                        while let Some(ch) = state.peek() {
142                            if ch == quote {
143                                state.advance(1);
144                                break;
145                            }
146                            else if ch == '\\' {
147                                state.advance(1);
148                                if state.peek().is_some() {
149                                    state.advance(1)
150                                }
151                            }
152                            else {
153                                state.advance(ch.len_utf8())
154                            }
155                        }
156                    }
157                    else {
158                        while let Some(ch) = state.peek() {
159                            if ch == ')' || ch.is_whitespace() {
160                                break;
161                            }
162                            state.advance(ch.len_utf8())
163                        }
164                    }
165                }
166
167                // Skip whitespace
168                while let Some(ch) = state.peek() {
169                    if ch.is_whitespace() { state.advance(ch.len_utf8()) } else { break }
170                }
171
172                // Skip closing )
173                if let Some(')') = state.peek() {
174                    state.advance(1)
175                }
176
177                state.add_token(CssTokenType::UrlLiteral, start_pos, state.get_position());
178                true
179            }
180            else {
181                false
182            }
183        }
184        else {
185            false
186        }
187    }
188
189    /// Handles color literals (e.g., `#fff`, `#ffffff`).
190    fn lex_color<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
191        let start_pos = state.get_position();
192
193        if let Some('#') = state.peek() {
194            state.advance(1); // Skip #
195
196            let mut count = 0;
197            while let Some(ch) = state.peek() {
198                if ch.is_ascii_hexdigit() {
199                    state.advance(1);
200                    count += 1
201                }
202                else {
203                    break;
204                }
205            }
206
207            if count == 3 || count == 4 || count == 6 || count == 8 {
208                state.add_token(CssTokenType::ColorLiteral, start_pos, state.get_position());
209                true
210            }
211            else {
212                // Not a valid color, but we'll treat it as a hash + something else
213                // This is a simplification for the lexer
214                state.add_token(CssTokenType::Hash, start_pos, start_pos + 1);
215                state.set_position(start_pos + 1);
216                true
217            }
218        }
219        else {
220            false
221        }
222    }
223
224    /// Handles number literals and units (e.g., `10px`, `1.5em`, `100%`).
225    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
226        let start_pos = state.get_position();
227
228        let mut has_digits = false;
229        if let Some(ch) = state.peek() {
230            if ch == '+' || ch == '-' {
231                state.advance(1)
232            }
233        }
234
235        while let Some(ch) = state.peek() {
236            if ch.is_ascii_digit() {
237                state.advance(1);
238                has_digits = true
239            }
240            else {
241                break;
242            }
243        }
244
245        if let Some('.') = state.peek() {
246            if let Some(next_ch) = state.peek_next_n(1) {
247                if next_ch.is_ascii_digit() {
248                    state.advance(1); // Skip .
249                    while let Some(ch) = state.peek() {
250                        if ch.is_ascii_digit() {
251                            state.advance(1);
252                            has_digits = true
253                        }
254                        else {
255                            break;
256                        }
257                    }
258                }
259            }
260        }
261
262        if has_digits {
263            // Check for units
264            let unit_start = state.get_position();
265            while let Some(ch) = state.peek() {
266                if ch.is_alphabetic() || ch == '%' { state.advance(ch.len_utf8()) } else { break }
267            }
268
269            if state.get_position() > unit_start {
270                // We have a number with a unit
271                state.add_token(CssTokenType::NumberLiteral, start_pos, state.get_position())
272            }
273            else {
274                state.add_token(CssTokenType::NumberLiteral, start_pos, state.get_position())
275            }
276            true
277        }
278        else {
279            state.set_position(start_pos);
280            false
281        }
282    }
283
284    /// Handles identifiers (e.g., property names, selectors).
285    fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
286        let start_pos = state.get_position();
287
288        if let Some(ch) = state.peek() {
289            if ch.is_alphabetic() || ch == '_' || ch == '-' {
290                while let Some(ch) = state.peek() {
291                    if ch.is_alphanumeric() || ch == '_' || ch == '-' { state.advance(ch.len_utf8()) } else { break }
292                }
293
294                state.add_token(CssTokenType::Identifier, start_pos, state.get_position());
295                true
296            }
297            else {
298                false
299            }
300        }
301        else {
302            false
303        }
304    }
305
306    /// Handles CSS at-rules (e.g., `@import`, `@media`).
307    fn lex_at_rule<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
308        let start_pos = state.get_position();
309
310        if let Some('@') = state.peek() {
311            state.advance(1); // Skip @
312
313            let rule_start = state.get_position();
314            while let Some(ch) = state.peek() {
315                if ch.is_alphabetic() || ch == '-' { state.advance(ch.len_utf8()) } else { break }
316            }
317
318            let rule_name = state.get_text_in((rule_start..state.get_position()).into());
319            let token_kind = match rule_name.as_ref() {
320                "import" => CssTokenType::AtImport,
321                "media" => CssTokenType::AtMedia,
322                "keyframes" => CssTokenType::AtKeyframes,
323                "font-face" => CssTokenType::AtFontFace,
324                "charset" => CssTokenType::AtCharset,
325                "namespace" => CssTokenType::AtNamespace,
326                "supports" => CssTokenType::AtSupports,
327                "page" => CssTokenType::AtPage,
328                "document" => CssTokenType::AtDocument,
329                _ => CssTokenType::AtRule,
330            };
331
332            state.add_token(token_kind, start_pos, state.get_position());
333            true
334        }
335        else {
336            false
337        }
338    }
339
340    /// Handles delimiters (e.g., `(`, `)`, `{`, `}`, `,`, `;`).
341    fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
342        let start_pos = state.get_position();
343
344        if let Some(ch) = state.peek() {
345            let token_kind = match ch {
346                '(' => CssTokenType::LeftParen,
347                ')' => CssTokenType::RightParen,
348                '{' => CssTokenType::LeftBrace,
349                '}' => CssTokenType::RightBrace,
350                '[' => CssTokenType::LeftBracket,
351                ']' => CssTokenType::RightBracket,
352                ',' => CssTokenType::Comma,
353                ';' => CssTokenType::Semicolon,
354                _ => return false,
355            };
356
357            state.advance(1);
358            state.add_token(token_kind, start_pos, state.get_position());
359            true
360        }
361        else {
362            false
363        }
364    }
365
366    /// Handles operators (e.g., `:`, `.`, `>`, `+`, `~`, `*`, `/`).
367    fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
368        let start_pos = state.get_position();
369
370        if let Some(ch) = state.peek() {
371            let token_kind = match ch {
372                ':' => CssTokenType::Colon,
373                '.' => CssTokenType::Dot,
374                '#' => CssTokenType::Hash,
375                '+' => CssTokenType::Plus,
376                '-' => CssTokenType::Minus,
377                '*' => CssTokenType::Star,
378                '/' => CssTokenType::Slash,
379                '=' => CssTokenType::Equals,
380                '~' => CssTokenType::Tilde,
381                '|' => CssTokenType::Pipe,
382                '^' => CssTokenType::Caret,
383                '$' => CssTokenType::Dollar,
384                '>' => CssTokenType::GreaterThan,
385                _ => return false,
386            };
387
388            state.advance(1);
389            state.add_token(token_kind, start_pos, state.get_position());
390            true
391        }
392        else {
393            false
394        }
395    }
396
397    /// Main entry point for the lexer.
398    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
399        while state.not_at_end() {
400            let safe_point = state.get_position();
401
402            // Try various lexing rules
403            if self.skip_whitespace(state) {
404                continue;
405            }
406
407            if self.lex_newline(state) {
408                continue;
409            }
410
411            if self.lex_comment(state) {
412                continue;
413            }
414
415            if self.lex_string(state) {
416                continue;
417            }
418
419            if self.lex_url(state) {
420                continue;
421            }
422
423            if self.lex_color(state) {
424                continue;
425            }
426
427            if self.lex_number(state) {
428                continue;
429            }
430
431            if self.lex_at_rule(state) {
432                continue;
433            }
434
435            if self.lex_identifier(state) {
436                continue;
437            }
438
439            if self.lex_delimiter(state) {
440                continue;
441            }
442
443            if self.lex_operator(state) {
444                continue;
445            }
446
447            // If no rules match, skip the current character and mark as error
448            let start_pos = state.get_position();
449            if let Some(ch) = state.peek() {
450                state.advance(ch.len_utf8());
451                state.add_token(CssTokenType::Error, start_pos, state.get_position())
452            }
453            else {
454                break;
455            }
456
457            state.advance_if_dead_lock(safe_point)
458        }
459        Ok(())
460    }
461}
462
463impl<'config> Lexer<CssLanguage> for CssLexer<'config> {
464    /// Tokenizes the source code into a stream of CSS tokens.
465    fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], mut cache: &'a mut impl oak_core::lexer::LexerCache<CssLanguage>) -> LexOutput<CssLanguage> {
466        let mut state = LexerState::new(source);
467        let result = self.run(&mut state);
468        if result.is_ok() {
469            state.add_eof()
470        }
471        state.finish_with_cache(result, &mut cache)
472    }
473}