Skip to main content

oak_sass/lexer/
mod.rs

1use crate::{kind::SassSyntaxKind, language::SassLanguage};
2use oak_core::{
3    Lexer, LexerState, OakError, TextEdit,
4    lexer::{CommentConfig, LexOutput, LexerCache, StringConfig, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'s, S> = LexerState<'s, S, SassLanguage>;
10
11static SASS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static SASS_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: false });
13static SASS_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14static SASS_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
15
16#[derive(Clone, Debug)]
17pub struct SassLexer<'config> {
18    _config: &'config SassLanguage,
19}
20
21impl<'config> Lexer<SassLanguage> for SassLexer<'config> {
22    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<SassLanguage>) -> LexOutput<SassLanguage> {
23        let mut state = LexerState::new(source);
24        let result = self.run(&mut state);
25        if result.is_ok() {
26            state.add_eof();
27        }
28        state.finish_with_cache(result, cache)
29    }
30}
31
32impl<'config> SassLexer<'config> {
33    pub fn new(config: &'config SassLanguage) -> Self {
34        Self { _config: config }
35    }
36
37    /// 主要的词法分析循环
38    fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
39        while state.not_at_end() {
40            let safe_point = state.get_position();
41
42            if self.skip_whitespace(state) {
43                continue;
44            }
45
46            if self.skip_comment(state) {
47                continue;
48            }
49
50            if self.lex_string_literal(state) {
51                continue;
52            }
53
54            if self.lex_number_literal(state) {
55                continue;
56            }
57
58            if self.lex_variable(state) {
59                continue;
60            }
61
62            if self.lex_color_literal(state) {
63                continue;
64            }
65
66            if self.lex_identifier_or_keyword(state) {
67                continue;
68            }
69
70            if self.lex_operators(state) {
71                continue;
72            }
73
74            if self.lex_single_char_tokens(state) {
75                continue;
76            }
77
78            state.advance_if_dead_lock(safe_point);
79        }
80
81        Ok(())
82    }
83
84    /// 跳过空白字符
85    fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
86        SASS_WHITESPACE.scan(state, SassSyntaxKind::Whitespace)
87    }
88
89    fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
90        SASS_COMMENT.scan(state, SassSyntaxKind::LineComment, SassSyntaxKind::BlockComment)
91    }
92
93    fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
94        if SASS_STRING.scan(state, SassSyntaxKind::StringLiteral) {
95            return true;
96        }
97        if SASS_CHAR.scan(state, SassSyntaxKind::StringLiteral) {
98            return true;
99        }
100        false
101    }
102
103    fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
104        let start = state.get_position();
105        let first = match state.current() {
106            Some(c) => c,
107            None => return false,
108        };
109        if !first.is_ascii_digit() {
110            return false;
111        }
112
113        let mut is_float = false;
114        state.advance(first.len_utf8());
115
116        // 读取数字部分
117        while let Some(c) = state.current() {
118            if c.is_ascii_digit() || c == '_' {
119                state.advance(c.len_utf8());
120            }
121            else {
122                break;
123            }
124        }
125
126        // fractional part
127        if state.current() == Some('.') {
128            let n1 = state.source().get_char_at(state.get_position() + 1);
129            if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
130                is_float = true;
131                state.advance(1); // consume '.'
132                while let Some(c) = state.current() {
133                    if c.is_ascii_digit() || c == '_' {
134                        state.advance(c.len_utf8());
135                    }
136                    else {
137                        break;
138                    }
139                }
140            }
141        }
142
143        // 单位后缀 (px, em, rem, %, etc.)
144        while let Some(c) = state.current() {
145            if c.is_ascii_alphabetic() || c == '%' {
146                state.advance(c.len_utf8());
147            }
148            else {
149                break;
150            }
151        }
152
153        let end = state.get_position();
154        state.add_token(if is_float { SassSyntaxKind::FloatLiteral } else { SassSyntaxKind::NumberLiteral }, start, end);
155        true
156    }
157
158    fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
159        let start = state.get_position();
160        let ch = match state.current() {
161            Some(c) => c,
162            None => return false,
163        };
164        if !(ch.is_ascii_alphabetic() || ch == '_' || ch == '-' || ch == '@' || ch == '!') {
165            return false;
166        }
167        state.advance(ch.len_utf8());
168        while let Some(c) = state.current() {
169            if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
170                state.advance(c.len_utf8());
171            }
172            else {
173                break;
174            }
175        }
176        let end = state.get_position();
177        let text = state.source().get_text_in(core::range::Range { start, end });
178        let kind = match text.as_ref() {
179            "@import" => SassSyntaxKind::Import,
180            "@include" => SassSyntaxKind::Include,
181            "@extend" => SassSyntaxKind::Extend,
182            "@mixin" => SassSyntaxKind::Mixin,
183            "@function" => SassSyntaxKind::Function,
184            "@return" => SassSyntaxKind::Return,
185            "@if" => SassSyntaxKind::If,
186            "@else" => SassSyntaxKind::Else,
187            "@elseif" => SassSyntaxKind::ElseIf,
188            "@for" => SassSyntaxKind::For,
189            "@each" => SassSyntaxKind::Each,
190            "@while" => SassSyntaxKind::While,
191            "!default" => SassSyntaxKind::Default,
192            "!important" => SassSyntaxKind::Important,
193            "!optional" => SassSyntaxKind::Optional,
194            "!global" => SassSyntaxKind::Global,
195            "and" => SassSyntaxKind::And,
196            "or" => SassSyntaxKind::Or,
197            "not" => SassSyntaxKind::Not,
198            _ => SassSyntaxKind::Identifier,
199        };
200        state.add_token(kind, start, end);
201        true
202    }
203
204    fn lex_variable<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
205        let start = state.get_position();
206        if state.current() != Some('$') {
207            return false;
208        }
209        state.advance(1);
210
211        // 变量名必须以字母或下划线开头
212        if let Some(ch) = state.current() {
213            if ch.is_ascii_alphabetic() || ch == '_' {
214                state.advance(ch.len_utf8());
215                while let Some(c) = state.current() {
216                    if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
217                        state.advance(c.len_utf8());
218                    }
219                    else {
220                        break;
221                    }
222                }
223                state.add_token(SassSyntaxKind::Variable, start, state.get_position());
224                return true;
225            }
226        }
227        state.set_position(start);
228        false
229    }
230
231    fn lex_color_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
232        let start = state.get_position();
233        if state.current() != Some('#') {
234            return false;
235        }
236        state.advance(1);
237
238        let mut hex_digits = 0;
239        while let Some(c) = state.current() {
240            if c.is_ascii_hexdigit() {
241                state.advance(c.len_utf8());
242                hex_digits += 1;
243            }
244            else {
245                break;
246            }
247        }
248
249        // 有效的颜色值长度: 3, 4, 6, 8
250        if hex_digits == 3 || hex_digits == 4 || hex_digits == 6 || hex_digits == 8 {
251            state.add_token(SassSyntaxKind::ColorLiteral, start, state.get_position());
252            return true;
253        }
254
255        state.set_position(start);
256        false
257    }
258
259    fn lex_operators<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
260        let start = state.get_position();
261
262        // 多字符操作符
263        let patterns: &[(&str, SassSyntaxKind)] = &[("==", SassSyntaxKind::EqEq), ("!=", SassSyntaxKind::Ne), ("<=", SassSyntaxKind::Le), (">=", SassSyntaxKind::Ge)];
264
265        for (pat, kind) in patterns {
266            if state.source().get_text_from(start).as_ref().starts_with(pat) {
267                state.advance(pat.len());
268                state.add_token(*kind, start, state.get_position());
269                return true;
270            }
271        }
272
273        // 单字符操作符
274        if let Some(ch) = state.current() {
275            let kind = match ch {
276                '+' => Some(SassSyntaxKind::Plus),
277                '-' => Some(SassSyntaxKind::Minus),
278                '*' => Some(SassSyntaxKind::Star),
279                '/' => Some(SassSyntaxKind::Slash),
280                '%' => Some(SassSyntaxKind::Percent),
281                '=' => Some(SassSyntaxKind::Eq),
282                '<' => Some(SassSyntaxKind::Lt),
283                '>' => Some(SassSyntaxKind::Gt),
284                _ => None,
285            };
286            if let Some(k) = kind {
287                state.advance(ch.len_utf8());
288                state.add_token(k, start, state.get_position());
289                return true;
290            }
291        }
292        false
293    }
294
295    fn lex_single_char_tokens<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
296        let start = state.get_position();
297        if let Some(ch) = state.current() {
298            let kind = match ch {
299                '(' => SassSyntaxKind::LeftParen,
300                ')' => SassSyntaxKind::RightParen,
301                '{' => SassSyntaxKind::LeftBrace,
302                '}' => SassSyntaxKind::RightBrace,
303                '[' => SassSyntaxKind::LeftBracket,
304                ']' => SassSyntaxKind::RightBracket,
305                ';' => SassSyntaxKind::Semicolon,
306                ':' => SassSyntaxKind::Colon,
307                ',' => SassSyntaxKind::Comma,
308                '.' => SassSyntaxKind::Dot,
309                '#' => SassSyntaxKind::Hash,
310                '$' => SassSyntaxKind::Dollar,
311                '@' => SassSyntaxKind::At,
312                '&' => SassSyntaxKind::Ampersand,
313                '!' => SassSyntaxKind::Exclamation,
314                '?' => SassSyntaxKind::Question,
315                '~' => SassSyntaxKind::Tilde,
316                _ => return false,
317            };
318            state.advance(ch.len_utf8());
319            state.add_token(kind, start, state.get_position());
320            return true;
321        }
322        false
323    }
324}