Skip to main content

oak_scss/lexer/
mod.rs

1use crate::{kind::ScssSyntaxKind, language::ScssLanguage};
2use oak_core::{
3    Lexer, LexerState, OakError, TextEdit,
4    lexer::{CommentConfig, LexOutput, LexerCache, StringConfig, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, ScssLanguage>;
10
11static SCSS_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static SCSS_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: true });
13static SCSS_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Debug, Clone)]
16pub struct ScssLexer<'config> {
17    _config: &'config ScssLanguage,
18}
19
20impl<'config> Lexer<ScssLanguage> for ScssLexer<'config> {
21    fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<ScssLanguage>) -> LexOutput<ScssLanguage> {
22        let mut state = LexerState::new(text);
23        let result = self.run(&mut state);
24        if result.is_ok() {
25            state.add_eof();
26        }
27        state.finish_with_cache(result, cache)
28    }
29}
30
31impl<'config> ScssLexer<'config> {
32    pub fn new(config: &'config ScssLanguage) -> Self {
33        Self { _config: config }
34    }
35
36    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
37        while state.not_at_end() {
38            let safe_point = state.get_position();
39
40            if self.skip_whitespace(state) {
41                continue;
42            }
43
44            if self.lex_newline(state) {
45                continue;
46            }
47
48            if self.skip_comment(state) {
49                continue;
50            }
51
52            if self.lex_string_literal(state) {
53                continue;
54            }
55
56            if self.lex_number_literal(state) {
57                continue;
58            }
59
60            if self.lex_identifier_or_keyword(state) {
61                continue;
62            }
63
64            if self.lex_operators(state) {
65                continue;
66            }
67
68            if self.lex_single_char_tokens(state) {
69                continue;
70            }
71
72            // 错误处理:如果没有匹配任何规则,跳过当前字符并标记为错误
73            let start_pos = state.get_position();
74            if let Some(ch) = state.peek() {
75                state.advance(ch.len_utf8());
76                state.add_token(ScssSyntaxKind::Error, start_pos, state.get_position());
77            }
78
79            state.advance_if_dead_lock(safe_point);
80        }
81
82        Ok(())
83    }
84
85    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
86        SCSS_WHITESPACE.scan(state, ScssSyntaxKind::Whitespace)
87    }
88
89    /// 处理换行
90    fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
91        let start_pos = state.get_position();
92
93        if let Some('\n') = state.peek() {
94            state.advance(1);
95            state.add_token(ScssSyntaxKind::Newline, start_pos, state.get_position());
96            true
97        }
98        else if let Some('\r') = state.peek() {
99            state.advance(1);
100            if let Some('\n') = state.peek() {
101                state.advance(1);
102            }
103            state.add_token(ScssSyntaxKind::Newline, start_pos, state.get_position());
104            true
105        }
106        else {
107            false
108        }
109    }
110
111    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
112        SCSS_COMMENT.scan(state, ScssSyntaxKind::Comment, ScssSyntaxKind::Comment)
113    }
114
115    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
116        SCSS_STRING.scan(state, ScssSyntaxKind::StringLiteral)
117    }
118
119    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
120        let start = state.get_position();
121
122        if let Some(first_char) = state.peek() {
123            if first_char.is_ascii_digit() {
124                state.advance(first_char.len_utf8());
125
126                // Continue with digits
127                while let Some(ch) = state.peek() {
128                    if ch.is_ascii_digit() {
129                        state.advance(ch.len_utf8());
130                    }
131                    else {
132                        break;
133                    }
134                }
135
136                // Handle decimal point
137                if state.peek() == Some('.') && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
138                    state.advance(1); // consume '.'
139                    while let Some(ch) = state.peek() {
140                        if ch.is_ascii_digit() {
141                            state.advance(ch.len_utf8());
142                        }
143                        else {
144                            break;
145                        }
146                    }
147                }
148
149                state.add_token(ScssSyntaxKind::IntegerLiteral, start, state.get_position());
150                return true;
151            }
152        }
153        false
154    }
155
156    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
157        let start = state.get_position();
158        let text = state.source().get_text_from(start);
159
160        if let Some(first_char) = text.chars().next() {
161            if first_char.is_alphabetic() || first_char == '_' || first_char == '$' {
162                let mut len = first_char.len_utf8();
163
164                let mut chars = text.chars().skip(1);
165                while let Some(ch) = chars.next() {
166                    if ch.is_alphanumeric() || ch == '_' || ch == '-' {
167                        len += ch.len_utf8();
168                    }
169                    else {
170                        break;
171                    }
172                }
173
174                let word = &text[..len];
175                let kind = self.keyword_kind(word).unwrap_or(ScssSyntaxKind::Identifier);
176                state.advance(len);
177                state.add_token(kind, start, state.get_position());
178                return true;
179            }
180        }
181        false
182    }
183
184    fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
185        let start = state.get_position();
186        let text = state.source().get_text_from(start);
187
188        // Two-character operators
189        if text.len() >= 2 {
190            let two_char = &text[..2];
191            if let Some(kind) = self.operator_kind(two_char) {
192                state.advance(2);
193                state.add_token(kind, start, state.get_position());
194                return true;
195            }
196        }
197
198        // Single-character operators
199        if let Some(first_char) = text.chars().next() {
200            let single_char = &text[..first_char.len_utf8()];
201            if let Some(kind) = self.operator_kind(single_char) {
202                state.advance(first_char.len_utf8());
203                state.add_token(kind, start, state.get_position());
204                return true;
205            }
206        }
207
208        false
209    }
210
211    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
212        let start = state.get_position();
213        let text = state.source().get_text_from(start);
214
215        if let Some(first_char) = text.chars().next() {
216            let single_char = &text[..first_char.len_utf8()];
217            if let Some(kind) = self.single_char_kind(single_char) {
218                state.advance(first_char.len_utf8());
219                state.add_token(kind, start, state.get_position());
220                return true;
221            }
222        }
223
224        false
225    }
226
227    fn keyword_kind(&self, text: &str) -> Option<ScssSyntaxKind> {
228        match text {
229            "import" => Some(ScssSyntaxKind::Import),
230            "include" => Some(ScssSyntaxKind::Include),
231            "mixin" => Some(ScssSyntaxKind::Mixin),
232            "function" => Some(ScssSyntaxKind::Function),
233            "return" => Some(ScssSyntaxKind::Return),
234            "if" => Some(ScssSyntaxKind::If),
235            "else" => Some(ScssSyntaxKind::Else),
236            "for" => Some(ScssSyntaxKind::For),
237            "while" => Some(ScssSyntaxKind::While),
238            "each" => Some(ScssSyntaxKind::Each),
239            "in" => Some(ScssSyntaxKind::In),
240            "true" => Some(ScssSyntaxKind::True),
241            "false" => Some(ScssSyntaxKind::False),
242            "null" => Some(ScssSyntaxKind::Null),
243            _ => None,
244        }
245    }
246
247    fn operator_kind(&self, text: &str) -> Option<ScssSyntaxKind> {
248        match text {
249            "==" => Some(ScssSyntaxKind::EqEq),
250            "!=" => Some(ScssSyntaxKind::Ne),
251            "<=" => Some(ScssSyntaxKind::Le),
252            ">=" => Some(ScssSyntaxKind::Ge),
253            "&&" => Some(ScssSyntaxKind::AndAnd),
254            "||" => Some(ScssSyntaxKind::OrOr),
255            "=" => Some(ScssSyntaxKind::Eq),
256            "<" => Some(ScssSyntaxKind::Lt),
257            ">" => Some(ScssSyntaxKind::Gt),
258            "&" => Some(ScssSyntaxKind::And),
259            "|" => Some(ScssSyntaxKind::Or),
260            "^" => Some(ScssSyntaxKind::Xor),
261            "+" => Some(ScssSyntaxKind::Plus),
262            "-" => Some(ScssSyntaxKind::Minus),
263            "*" => Some(ScssSyntaxKind::Star),
264            "/" => Some(ScssSyntaxKind::Slash),
265            "%" => Some(ScssSyntaxKind::Percent),
266            "!" => Some(ScssSyntaxKind::Bang),
267            _ => None,
268        }
269    }
270
271    fn single_char_kind(&self, text: &str) -> Option<ScssSyntaxKind> {
272        match text {
273            "(" => Some(ScssSyntaxKind::LeftParen),
274            ")" => Some(ScssSyntaxKind::RightParen),
275            "{" => Some(ScssSyntaxKind::LeftBrace),
276            "}" => Some(ScssSyntaxKind::RightBrace),
277            "[" => Some(ScssSyntaxKind::LeftBracket),
278            "]" => Some(ScssSyntaxKind::RightBracket),
279            ";" => Some(ScssSyntaxKind::Semicolon),
280            ":" => Some(ScssSyntaxKind::Colon),
281            "," => Some(ScssSyntaxKind::Comma),
282            "." => Some(ScssSyntaxKind::Dot),
283            "#" => Some(ScssSyntaxKind::Hash),
284            "@" => Some(ScssSyntaxKind::At),
285            "$" => Some(ScssSyntaxKind::Dollar),
286            _ => None,
287        }
288    }
289}