Skip to main content

oak_twig/lexer/
mod.rs

1use crate::{kind::TwigSyntaxKind, language::TwigLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4#[derive(Clone, Debug)]
5pub struct TwigLexer<'config> {
6    /// 语言配置
7    _config: &'config TwigLanguage,
8}
9
10type State<'a, S> = LexerState<'a, S, TwigLanguage>;
11
12impl<'config> Lexer<TwigLanguage> for TwigLexer<'config> {
13    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<TwigLanguage>) -> LexOutput<TwigLanguage> {
14        let mut state = LexerState::new(source);
15        let result = self.run(&mut state);
16        if result.is_ok() {
17            state.add_eof();
18        }
19        state.finish_with_cache(result, cache)
20    }
21}
22
23impl<'config> TwigLexer<'config> {
24    /// 创建新的 Twig 词法分析器
25    pub fn new(config: &'config TwigLanguage) -> Self {
26        Self { _config: config }
27    }
28    fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
29        while state.not_at_end() {
30            let safe_point = state.get_position();
31
32            if self.skip_whitespace(state) {
33                continue;
34            }
35
36            if self.skip_comment(state) {
37                continue;
38            }
39
40            if self.lex_string(state) {
41                continue;
42            }
43
44            if self.lex_number(state) {
45                continue;
46            }
47
48            if self.lex_punctuation(state) {
49                continue;
50            }
51
52            if self.lex_identifier(state) {
53                continue;
54            }
55
56            state.advance_if_dead_lock(safe_point);
57        }
58
59        Ok(())
60    }
61
62    fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
63        let start = state.get_position();
64        let mut found = false;
65
66        while let Some(ch) = state.peek() {
67            if ch.is_whitespace() {
68                state.advance(ch.len_utf8());
69                found = true;
70            }
71            else {
72                break;
73            }
74        }
75
76        if found {
77            state.add_token(TwigSyntaxKind::Whitespace, start, state.get_position());
78        }
79
80        found
81    }
82
83    fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
84        let start = state.get_position();
85        if state.consume_if_starts_with("{#") {
86            while state.not_at_end() {
87                if state.consume_if_starts_with("#}") {
88                    break;
89                }
90                if let Some(ch) = state.peek() {
91                    state.advance(ch.len_utf8());
92                }
93            }
94            state.add_token(TwigSyntaxKind::Comment, start, state.get_position());
95            return true;
96        }
97        false
98    }
99
100    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
101        let start = state.get_position();
102
103        if let Some(quote) = state.peek() {
104            if quote == '"' || quote == '\'' {
105                state.advance(1);
106
107                while let Some(ch) = state.peek() {
108                    if ch == quote {
109                        state.advance(1);
110                        break;
111                    }
112                    else if ch == '\\' {
113                        state.advance(1);
114                        if let Some(_) = state.peek() {
115                            state.advance(1);
116                        }
117                    }
118                    else {
119                        state.advance(ch.len_utf8());
120                    }
121                }
122
123                state.add_token(TwigSyntaxKind::String, start, state.get_position());
124                return true;
125            }
126        }
127
128        false
129    }
130
131    fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
132        let start = state.get_position();
133
134        if let Some(ch) = state.peek() {
135            if ch.is_ascii_digit() {
136                state.advance(1);
137
138                while let Some(ch) = state.peek() {
139                    if ch.is_ascii_digit() || ch == '.' {
140                        state.advance(1);
141                    }
142                    else {
143                        break;
144                    }
145                }
146
147                state.add_token(TwigSyntaxKind::Number, start, state.get_position());
148                return true;
149            }
150        }
151
152        false
153    }
154
155    fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
156        let start = state.get_position();
157        let rest = state.rest();
158
159        // 双字符操作符
160        if rest.starts_with("{{") {
161            state.advance(2);
162            state.add_token(TwigSyntaxKind::DoubleLeftBrace, start, state.get_position());
163            return true;
164        }
165        if rest.starts_with("}}") {
166            state.advance(2);
167            state.add_token(TwigSyntaxKind::DoubleRightBrace, start, state.get_position());
168            return true;
169        }
170        if rest.starts_with("{%") {
171            state.advance(2);
172            state.add_token(TwigSyntaxKind::LeftBracePercent, start, state.get_position());
173            return true;
174        }
175        if rest.starts_with("%}") {
176            state.advance(2);
177            state.add_token(TwigSyntaxKind::PercentRightBrace, start, state.get_position());
178            return true;
179        }
180
181        // 单字符操作符
182        if let Some(ch) = state.peek() {
183            let kind = match ch {
184                '{' => TwigSyntaxKind::LeftBrace,
185                '}' => TwigSyntaxKind::RightBrace,
186                '(' => TwigSyntaxKind::LeftParen,
187                ')' => TwigSyntaxKind::RightParen,
188                '[' => TwigSyntaxKind::LeftBracket,
189                ']' => TwigSyntaxKind::RightBracket,
190                ',' => TwigSyntaxKind::Comma,
191                '.' => TwigSyntaxKind::Dot,
192                ':' => TwigSyntaxKind::Colon,
193                ';' => TwigSyntaxKind::Semicolon,
194                '|' => TwigSyntaxKind::Pipe,
195                '=' => TwigSyntaxKind::Eq,
196                '+' => TwigSyntaxKind::Plus,
197                '-' => TwigSyntaxKind::Minus,
198                '*' => TwigSyntaxKind::Star,
199                '/' => TwigSyntaxKind::Slash,
200                '%' => TwigSyntaxKind::Percent,
201                '!' => TwigSyntaxKind::Bang,
202                '?' => TwigSyntaxKind::Question,
203                '<' => TwigSyntaxKind::Lt,
204                '>' => TwigSyntaxKind::Gt,
205                '&' => TwigSyntaxKind::Amp,
206                '^' => TwigSyntaxKind::Caret,
207                '~' => TwigSyntaxKind::Tilde,
208                _ => return false,
209            };
210
211            state.advance(1);
212            state.add_token(kind, start, state.get_position());
213            return true;
214        }
215
216        false
217    }
218
219    fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
220        let start = state.get_position();
221
222        if let Some(ch) = state.peek() {
223            if ch.is_ascii_alphabetic() || ch == '_' {
224                state.advance(ch.len_utf8());
225
226                while let Some(ch) = state.peek() {
227                    if ch.is_ascii_alphanumeric() || ch == '_' {
228                        state.advance(ch.len_utf8());
229                    }
230                    else {
231                        break;
232                    }
233                }
234
235                let end = state.get_position();
236                let text = state.get_text_in((start..end).into());
237
238                // 检查是否为布尔关键字
239                let kind = match text.as_ref() {
240                    "true" | "false" => TwigSyntaxKind::Boolean,
241                    _ => TwigSyntaxKind::Identifier,
242                };
243                state.add_token(kind, start, end);
244                return true;
245            }
246        }
247        false
248    }
249}