oak_graphql/lexer/
mod.rs

1use crate::{kind::GraphQLSyntaxKind, language::GraphQLLanguage};
2use oak_core::{
3    Lexer, LexerCache, LexerState, OakError, TextEdit,
4    lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, GraphQLLanguage>;
10
11static GRAPHQL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static GRAPHQL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "#", block_start: "", block_end: "", nested_blocks: false });
13static GRAPHQL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct GraphQLLexer;
17
18impl Lexer<GraphQLLanguage> for GraphQLLexer {
19    fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<GraphQLLanguage>) -> LexOutput<GraphQLLanguage> {
20        let mut state = LexerState::new(text);
21        let result = self.run(&mut state);
22        if result.is_ok() {
23            state.add_eof();
24        }
25        state.finish_with_cache(result, cache)
26    }
27}
28
29impl GraphQLLexer {
30    pub fn new(_config: &GraphQLLanguage) -> Self {
31        Self
32    }
33
34    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
35        while state.not_at_end() {
36            let safe_point = state.get_position();
37
38            if self.skip_whitespace(state) {
39                continue;
40            }
41
42            if self.skip_comment(state) {
43                continue;
44            }
45
46            if self.lex_string_literal(state) {
47                continue;
48            }
49
50            if self.lex_number_literal(state) {
51                continue;
52            }
53
54            if self.lex_identifier_or_keyword(state) {
55                continue;
56            }
57
58            if self.lex_operators(state) {
59                continue;
60            }
61
62            if self.lex_single_char_tokens(state) {
63                continue;
64            }
65
66            state.advance_if_dead_lock(safe_point);
67        }
68
69        Ok(())
70    }
71
72    /// 跳过空白字符
73    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
74        GRAPHQL_WHITESPACE.scan(state, GraphQLSyntaxKind::Whitespace)
75    }
76
77    /// 跳过注释
78    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
79        GRAPHQL_COMMENT.scan(state, GraphQLSyntaxKind::Comment, GraphQLSyntaxKind::Comment)
80    }
81
82    /// 词法分析字符串字面量
83    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
84        // 普通字符串 "..."
85        if GRAPHQL_STRING.scan(state, GraphQLSyntaxKind::StringLiteral) {
86            return true;
87        }
88
89        // 多行字符串 """..."""
90        if state.starts_with("\"\"\"") {
91            let start = state.get_position();
92            state.advance(3); // 跳过开始的 """
93
94            while state.not_at_end() {
95                if state.starts_with("\"\"\"") {
96                    state.advance(3); // 跳过结束的 """
97                    break;
98                }
99                if let Some(ch) = state.peek() {
100                    state.advance(ch.len_utf8());
101                }
102            }
103
104            let end = state.get_position();
105            state.add_token(GraphQLSyntaxKind::StringLiteral, start, end);
106            return true;
107        }
108
109        false
110    }
111
112    /// 词法分析数字字面量
113    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
114        let start = state.get_position();
115        let mut has_digits = false;
116        let mut is_float = false;
117
118        // 处理负号
119        if state.starts_with("-") {
120            state.advance(1);
121        }
122
123        // 处理整数部分
124        if state.starts_with("0") {
125            // 单独的 0
126            state.advance(1);
127            has_digits = true;
128        }
129        else {
130            // 非零开头的数字
131            while let Some(ch) = state.peek() {
132                if ch.is_ascii_digit() {
133                    state.advance(ch.len_utf8());
134                    has_digits = true;
135                }
136                else {
137                    break;
138                }
139            }
140        }
141
142        // 处理小数部分
143        if state.starts_with(".") && has_digits {
144            if let Some(next_ch) = state.peek_next_n(1) {
145                if next_ch.is_ascii_digit() {
146                    state.advance(1); // 跳过 .
147                    is_float = true;
148
149                    while let Some(ch) = state.peek() {
150                        if ch.is_ascii_digit() {
151                            state.advance(ch.len_utf8());
152                        }
153                        else {
154                            break;
155                        }
156                    }
157                }
158            }
159        }
160
161        // 处理指数部分
162        if (state.starts_with("e") || state.starts_with("E")) && has_digits {
163            state.advance(1);
164            is_float = true;
165
166            // 处理指数符号
167            if state.starts_with("+") || state.starts_with("-") {
168                state.advance(1);
169            }
170
171            // 处理指数数字
172            let mut exp_digits = false;
173            while let Some(ch) = state.peek() {
174                if ch.is_ascii_digit() {
175                    state.advance(ch.len_utf8());
176                    exp_digits = true;
177                }
178                else {
179                    break;
180                }
181            }
182            if !exp_digits {
183                return false;
184            }
185        }
186
187        if !has_digits {
188            return false;
189        }
190
191        let kind = if is_float { GraphQLSyntaxKind::FloatLiteral } else { GraphQLSyntaxKind::IntLiteral };
192        state.add_token(kind, start, state.get_position());
193        true
194    }
195
196    /// 词法分析标识符或关键字
197    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
198        let start = state.get_position();
199
200        // 标识符必须以字母或下划线开始
201        if let Some(first_ch) = state.peek() {
202            if !first_ch.is_alphabetic() && first_ch != '_' {
203                return false;
204            }
205
206            state.advance(first_ch.len_utf8());
207
208            // 后续字符可以是字母、数字或下划线
209            while let Some(ch) = state.peek() {
210                if ch.is_alphanumeric() || ch == '_' {
211                    state.advance(ch.len_utf8());
212                }
213                else {
214                    break;
215                }
216            }
217
218            let end = state.get_position();
219            let text = state.get_text_in((start..end).into());
220            let kind = self.keyword_or_identifier(&text);
221            state.add_token(kind, start, end);
222            true
223        }
224        else {
225            false
226        }
227    }
228
229    /// 判断是关键字还是标识符
230    fn keyword_or_identifier(&self, text: &str) -> GraphQLSyntaxKind {
231        match text {
232            // 关键字
233            "query" => GraphQLSyntaxKind::QueryKeyword,
234            "mutation" => GraphQLSyntaxKind::MutationKeyword,
235            "subscription" => GraphQLSyntaxKind::SubscriptionKeyword,
236            "fragment" => GraphQLSyntaxKind::FragmentKeyword,
237            "on" => GraphQLSyntaxKind::OnKeyword,
238            "type" => GraphQLSyntaxKind::TypeKeyword,
239            "interface" => GraphQLSyntaxKind::InterfaceKeyword,
240            "union" => GraphQLSyntaxKind::UnionKeyword,
241            "scalar" => GraphQLSyntaxKind::ScalarKeyword,
242            "enum" => GraphQLSyntaxKind::EnumKeyword,
243            "input" => GraphQLSyntaxKind::InputKeyword,
244            "extend" => GraphQLSyntaxKind::ExtendKeyword,
245            "schema" => GraphQLSyntaxKind::SchemaKeyword,
246            "directive" => GraphQLSyntaxKind::DirectiveKeyword,
247            "implements" => GraphQLSyntaxKind::ImplementsKeyword,
248            "repeats" => GraphQLSyntaxKind::RepeatsKeyword,
249
250            // 特殊字面量
251            "true" | "false" => GraphQLSyntaxKind::BooleanLiteral,
252            "null" => GraphQLSyntaxKind::NullLiteral,
253
254            // 默认为名称
255            _ => GraphQLSyntaxKind::Name,
256        }
257    }
258
259    /// 词法分析操作符
260    fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
261        let start = state.get_position();
262
263        // 三字符操作符
264        if state.starts_with("...") {
265            state.advance(3);
266            state.add_token(GraphQLSyntaxKind::Spread, start, state.get_position());
267            return true;
268        }
269
270        false
271    }
272
273    /// 词法分析单字符 token
274    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
275        if let Some(ch) = state.peek() {
276            let start = state.get_position();
277            let kind = match ch {
278                '(' => Some(GraphQLSyntaxKind::LeftParen),
279                ')' => Some(GraphQLSyntaxKind::RightParen),
280                '[' => Some(GraphQLSyntaxKind::LeftBracket),
281                ']' => Some(GraphQLSyntaxKind::RightBracket),
282                '{' => Some(GraphQLSyntaxKind::LeftBrace),
283                '}' => Some(GraphQLSyntaxKind::RightBrace),
284                ',' => Some(GraphQLSyntaxKind::Comma),
285                ':' => Some(GraphQLSyntaxKind::Colon),
286                ';' => Some(GraphQLSyntaxKind::Semicolon),
287                '|' => Some(GraphQLSyntaxKind::Pipe),
288                '&' => Some(GraphQLSyntaxKind::Ampersand),
289                '=' => Some(GraphQLSyntaxKind::Equals),
290                '!' => Some(GraphQLSyntaxKind::Exclamation),
291                '@' => Some(GraphQLSyntaxKind::At),
292                '$' => Some(GraphQLSyntaxKind::Dollar),
293                _ => None,
294            };
295
296            if let Some(token_kind) = kind {
297                state.advance(ch.len_utf8());
298                let end = state.get_position();
299                state.add_token(token_kind, start, end);
300                true
301            }
302            else {
303                false
304            }
305        }
306        else {
307            false
308        }
309    }
310}