oak_graphql/lexer/
mod.rs

1use crate::{kind::GraphQLSyntaxKind, language::GraphQLLanguage};
2use oak_core::{
3    IncrementalCache, Lexer, LexerState, OakError,
4    lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig},
5    source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, GraphQLLanguage>;
10
11static GRAPHQL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static GRAPHQL_COMMENT: LazyLock<CommentLine> = LazyLock::new(|| CommentLine { line_markers: &["#"] });
13static GRAPHQL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct GraphQLLexer<'config> {
17    config: &'config GraphQLLanguage,
18}
19
20impl<'config> Lexer<GraphQLLanguage> for GraphQLLexer<'config> {
21    fn lex_incremental(
22        &self,
23        source: impl Source,
24        changed: usize,
25        cache: IncrementalCache<GraphQLLanguage>,
26    ) -> LexOutput<GraphQLLanguage> {
27        let mut state = LexerState::new_with_cache(source, changed, cache);
28        let result = self.run(&mut state);
29        state.finish(result)
30    }
31}
32
33impl<'config> GraphQLLexer<'config> {
34    pub fn new(config: &'config GraphQLLanguage) -> Self {
35        Self { config }
36    }
37
38    fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
39        while state.not_at_end() {
40            let safe_point = state.get_position();
41
42            if self.skip_whitespace(state) {
43                continue;
44            }
45
46            if self.skip_comment(state) {
47                continue;
48            }
49
50            if self.lex_string_literal(state) {
51                continue;
52            }
53
54            if self.lex_number_literal(state) {
55                continue;
56            }
57
58            if self.lex_identifier_or_keyword(state) {
59                continue;
60            }
61
62            if self.lex_operators(state) {
63                continue;
64            }
65
66            if self.lex_single_char_tokens(state) {
67                continue;
68            }
69
70            state.safe_check(safe_point);
71        }
72
73        // 添加 EOF token
74        let eof_pos = state.get_position();
75        state.add_token(GraphQLSyntaxKind::Eof, eof_pos, eof_pos);
76        Ok(())
77    }
78
79    /// 跳过空白字符
80    fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
81        match GRAPHQL_WHITESPACE.scan(state.rest(), state.get_position(), GraphQLSyntaxKind::Whitespace) {
82            Some(token) => {
83                state.advance_with(token);
84                true
85            }
86            None => false,
87        }
88    }
89
90    /// 跳过注释
91    fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
92        match GRAPHQL_COMMENT.scan(state.rest(), state.get_position(), GraphQLSyntaxKind::Comment) {
93            Some(token) => {
94                state.advance_with(token);
95                true
96            }
97            None => false,
98        }
99    }
100
101    /// 词法分析字符串字面量
102    fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
103        // 普通字符串 "..."
104        if let Some(token) = GRAPHQL_STRING.scan(state.rest(), state.get_position(), GraphQLSyntaxKind::StringLiteral) {
105            state.advance_with(token);
106            return true;
107        }
108
109        // 多行字符串 """..."""
110        if state.rest().starts_with("\"\"\"") {
111            let start = state.get_position();
112            state.advance(3); // 跳过开始的 """
113
114            while state.not_at_end() {
115                if state.rest().starts_with("\"\"\"") {
116                    state.advance(3); // 跳过结束的 """
117                    break;
118                }
119                if let Some(ch) = state.peek() {
120                    state.advance(ch.len_utf8());
121                }
122            }
123
124            let end = state.get_position();
125            state.add_token(GraphQLSyntaxKind::StringLiteral, start, end);
126            return true;
127        }
128
129        false
130    }
131
132    /// 词法分析数字字面量
133    fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
134        let start = state.get_position();
135        let mut has_digits = false;
136        let mut is_float = false;
137
138        // 处理负号
139        if state.rest().starts_with('-') {
140            state.advance(1);
141        }
142
143        // 处理整数部分
144        if state.rest().starts_with('0') {
145            // 单独的 0
146            state.advance(1);
147            has_digits = true;
148        }
149        else {
150            // 非零开头的数字
151            while let Some(ch) = state.peek() {
152                if ch.is_ascii_digit() {
153                    state.advance(ch.len_utf8());
154                    has_digits = true;
155                }
156                else {
157                    break;
158                }
159            }
160        }
161
162        // 处理小数部分
163        if state.rest().starts_with('.') && has_digits {
164            if let Some(next_ch) = state.rest().chars().nth(1) {
165                if next_ch.is_ascii_digit() {
166                    state.advance(1); // 跳过 .
167                    is_float = true;
168
169                    while let Some(ch) = state.peek() {
170                        if ch.is_ascii_digit() {
171                            state.advance(ch.len_utf8());
172                        }
173                        else {
174                            break;
175                        }
176                    }
177                }
178            }
179        }
180
181        // 处理指数部分
182        if (state.rest().starts_with('e') || state.rest().starts_with('E')) && has_digits {
183            state.advance(1);
184            is_float = true;
185
186            // 处理指数符号
187            if state.rest().starts_with('+') || state.rest().starts_with('-') {
188                state.advance(1);
189            }
190
191            // 处理指数数字
192            let mut exp_digits = false;
193            while let Some(ch) = state.peek() {
194                if ch.is_ascii_digit() {
195                    state.advance(ch.len_utf8());
196                    exp_digits = true;
197                }
198                else {
199                    break;
200                }
201            }
202
203            if !exp_digits {
204                // 指数部分必须有数字
205                return false;
206            }
207        }
208
209        if has_digits {
210            let end = state.get_position();
211            let kind = if is_float { GraphQLSyntaxKind::FloatLiteral } else { GraphQLSyntaxKind::IntLiteral };
212            state.add_token(kind, start, end);
213            true
214        }
215        else {
216            false
217        }
218    }
219
220    /// 词法分析标识符或关键字
221    fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
222        let start = state.get_position();
223
224        // 标识符必须以字母或下划线开始
225        if let Some(first_ch) = state.peek() {
226            if !first_ch.is_alphabetic() && first_ch != '_' {
227                return false;
228            }
229
230            state.advance(first_ch.len_utf8());
231
232            // 后续字符可以是字母、数字或下划线
233            while let Some(ch) = state.peek() {
234                if ch.is_alphanumeric() || ch == '_' {
235                    state.advance(ch.len_utf8());
236                }
237                else {
238                    break;
239                }
240            }
241
242            let end = state.get_position();
243            let text = state.get_text_in((start..end).into());
244            let kind = self.keyword_or_identifier(&text);
245            state.add_token(kind, start, end);
246            true
247        }
248        else {
249            false
250        }
251    }
252
253    /// 判断是关键字还是标识符
254    fn keyword_or_identifier(&self, text: &str) -> GraphQLSyntaxKind {
255        match text {
256            // 关键字
257            "query" => GraphQLSyntaxKind::QueryKeyword,
258            "mutation" => GraphQLSyntaxKind::MutationKeyword,
259            "subscription" => GraphQLSyntaxKind::SubscriptionKeyword,
260            "fragment" => GraphQLSyntaxKind::FragmentKeyword,
261            "on" => GraphQLSyntaxKind::OnKeyword,
262            "type" => GraphQLSyntaxKind::TypeKeyword,
263            "interface" => GraphQLSyntaxKind::InterfaceKeyword,
264            "union" => GraphQLSyntaxKind::UnionKeyword,
265            "scalar" => GraphQLSyntaxKind::ScalarKeyword,
266            "enum" => GraphQLSyntaxKind::EnumKeyword,
267            "input" => GraphQLSyntaxKind::InputKeyword,
268            "extend" => GraphQLSyntaxKind::ExtendKeyword,
269            "schema" => GraphQLSyntaxKind::SchemaKeyword,
270            "directive" => GraphQLSyntaxKind::DirectiveKeyword,
271            "implements" => GraphQLSyntaxKind::ImplementsKeyword,
272            "repeats" => GraphQLSyntaxKind::RepeatsKeyword,
273
274            // 特殊字面量
275            "true" | "false" => GraphQLSyntaxKind::BooleanLiteral,
276            "null" => GraphQLSyntaxKind::NullLiteral,
277
278            // 默认为名称
279            _ => GraphQLSyntaxKind::Name,
280        }
281    }
282
283    /// 词法分析操作符
284    fn lex_operators<S: Source>(&self, state: &mut State<S>) -> bool {
285        let start = state.get_position();
286        let rest = state.rest();
287
288        // 三字符操作符
289        if rest.starts_with("...") {
290            state.advance(3);
291            state.add_token(GraphQLSyntaxKind::Spread, start, state.get_position());
292            return true;
293        }
294
295        false
296    }
297
298    /// 词法分析单字符 token
299    fn lex_single_char_tokens<S: Source>(&self, state: &mut State<S>) -> bool {
300        if let Some(ch) = state.peek() {
301            let start = state.get_position();
302            let kind = match ch {
303                '(' => Some(GraphQLSyntaxKind::LeftParen),
304                ')' => Some(GraphQLSyntaxKind::RightParen),
305                '[' => Some(GraphQLSyntaxKind::LeftBracket),
306                ']' => Some(GraphQLSyntaxKind::RightBracket),
307                '{' => Some(GraphQLSyntaxKind::LeftBrace),
308                '}' => Some(GraphQLSyntaxKind::RightBrace),
309                ',' => Some(GraphQLSyntaxKind::Comma),
310                ':' => Some(GraphQLSyntaxKind::Colon),
311                ';' => Some(GraphQLSyntaxKind::Semicolon),
312                '|' => Some(GraphQLSyntaxKind::Pipe),
313                '&' => Some(GraphQLSyntaxKind::Ampersand),
314                '=' => Some(GraphQLSyntaxKind::Equals),
315                '!' => Some(GraphQLSyntaxKind::Exclamation),
316                '@' => Some(GraphQLSyntaxKind::At),
317                '$' => Some(GraphQLSyntaxKind::Dollar),
318                _ => None,
319            };
320
321            if let Some(token_kind) = kind {
322                state.advance(ch.len_utf8());
323                let end = state.get_position();
324                state.add_token(token_kind, start, end);
325                true
326            }
327            else {
328                false
329            }
330        }
331        else {
332            false
333        }
334    }
335}