Skip to main content

oak_graphql/lexer/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Token types for GraphQL.
3pub mod token_type;
4
5use crate::{language::GraphQLLanguage, lexer::token_type::GraphQLTokenType};
6use oak_core::{
7    Lexer, LexerCache, LexerState, OakError, TextEdit,
8    lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
9    source::Source,
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, GraphQLLanguage>;
14
15static GRAPHQL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static GRAPHQL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "#", block_start: "", block_end: "", nested_blocks: false });
17static GRAPHQL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18
19/// A lexer for GraphQL source files.
20#[derive(Clone, Debug)]
21pub struct GraphQLLexer<'config> {
22    config: &'config GraphQLLanguage,
23}
24
25impl<'config> Lexer<GraphQLLanguage> for GraphQLLexer<'config> {
26    fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<GraphQLLanguage>) -> LexOutput<GraphQLLanguage> {
27        let mut state = LexerState::new(text);
28        let result = self.run(&mut state);
29        if result.is_ok() {
30            state.add_eof();
31        }
32        state.finish_with_cache(result, cache)
33    }
34}
35
36impl<'config> GraphQLLexer<'config> {
37    /// Creates a new GraphQL lexer.
38    pub fn new(config: &'config GraphQLLanguage) -> Self {
39        Self { config }
40    }
41
42    fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
43        while state.not_at_end() {
44            let safe_point = state.get_position();
45
46            if self.skip_whitespace(state) {
47                continue;
48            }
49
50            if self.skip_comment(state) {
51                continue;
52            }
53
54            if self.lex_string_literal(state) {
55                continue;
56            }
57
58            if self.lex_number_literal(state) {
59                continue;
60            }
61
62            if self.lex_identifier_or_keyword(state) {
63                continue;
64            }
65
66            if self.lex_operators(state) {
67                continue;
68            }
69
70            if self.lex_single_char_tokens(state) {
71                continue;
72            }
73
74            state.advance_if_dead_lock(safe_point);
75        }
76
77        Ok(())
78    }
79
80    /// Skips whitespace characters.
81    fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82        GRAPHQL_WHITESPACE.scan(state, GraphQLTokenType::Whitespace)
83    }
84
85    /// Skips comments.
86    fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
87        GRAPHQL_COMMENT.scan(state, GraphQLTokenType::Comment, GraphQLTokenType::Comment)
88    }
89
90    /// Lexes string literals.
91    fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
92        // Normal string "..."
93        if GRAPHQL_STRING.scan(state, GraphQLTokenType::StringLiteral) {
94            return true;
95        }
96
97        // Multiline string """..."""
98        if state.starts_with("\"\"\"") {
99            let start = state.get_position();
100            state.advance(3); // Skip opening """
101
102            while state.not_at_end() {
103                if state.starts_with("\"\"\"") {
104                    state.advance(3); // Skip closing """
105                    break;
106                }
107                if let Some(ch) = state.peek() {
108                    state.advance(ch.len_utf8());
109                }
110            }
111
112            let end = state.get_position();
113            state.add_token(GraphQLTokenType::StringLiteral, start, end);
114            return true;
115        }
116
117        false
118    }
119
120    /// Lexes number literals.
121    fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
122        let start = state.get_position();
123        let mut has_digits = false;
124        let mut is_float = false;
125
126        // Handle negative sign
127        if state.starts_with("-") {
128            state.advance(1);
129        }
130
131        // Handle integer part
132        if state.starts_with("0") {
133            // Single zero
134            state.advance(1);
135            has_digits = true;
136        }
137        else {
138            // Digits not starting with zero
139            while let Some(ch) = state.peek() {
140                if ch.is_ascii_digit() {
141                    state.advance(ch.len_utf8());
142                    has_digits = true;
143                }
144                else {
145                    break;
146                }
147            }
148        }
149
150        // Handle fractional part
151        if state.starts_with(".") && has_digits {
152            if let Some(next_ch) = state.peek_next_n(1) {
153                if next_ch.is_ascii_digit() {
154                    state.advance(1); // Skip .
155                    is_float = true;
156
157                    while let Some(ch) = state.peek() {
158                        if ch.is_ascii_digit() {
159                            state.advance(ch.len_utf8());
160                        }
161                        else {
162                            break;
163                        }
164                    }
165                }
166            }
167        }
168
169        // Handle exponent part
170        if (state.starts_with("e") || state.starts_with("E")) && has_digits {
171            state.advance(1);
172            is_float = true;
173
174            // Handle exponent sign
175            if state.starts_with("+") || state.starts_with("-") {
176                state.advance(1);
177            }
178
179            // Handle exponent digits
180            let mut exp_digits = false;
181            while let Some(ch) = state.peek() {
182                if ch.is_ascii_digit() {
183                    state.advance(ch.len_utf8());
184                    exp_digits = true;
185                }
186                else {
187                    break;
188                }
189            }
190            if !exp_digits {
191                return false;
192            }
193        }
194
195        if !has_digits {
196            return false;
197        }
198
199        let kind = if is_float { GraphQLTokenType::FloatLiteral } else { GraphQLTokenType::IntLiteral };
200        state.add_token(kind, start, state.get_position());
201        true
202    }
203
204    /// Lexes identifiers or keywords.
205    fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
206        let start = state.get_position();
207
208        // Identifier must start with a letter or underscore
209        if let Some(first_ch) = state.peek() {
210            if !first_ch.is_alphabetic() && first_ch != '_' {
211                return false;
212            }
213
214            state.advance(first_ch.len_utf8());
215
216            // Subsequent characters can be alphanumeric or underscore
217            while let Some(ch) = state.peek() {
218                if ch.is_alphanumeric() || ch == '_' {
219                    state.advance(ch.len_utf8());
220                }
221                else {
222                    break;
223                }
224            }
225
226            let end = state.get_position();
227            let text = state.get_text_in((start..end).into());
228            let kind = self.keyword_or_identifier(&text);
229            state.add_token(kind, start, end);
230            true
231        }
232        else {
233            false
234        }
235    }
236
237    /// Determines if the text is a keyword or identifier.
238    fn keyword_or_identifier(&self, text: &str) -> GraphQLTokenType {
239        match text {
240            // Keywords
241            "query" => GraphQLTokenType::QueryKeyword,
242            "mutation" => GraphQLTokenType::MutationKeyword,
243            "subscription" => GraphQLTokenType::SubscriptionKeyword,
244            "fragment" => GraphQLTokenType::FragmentKeyword,
245            "on" => GraphQLTokenType::OnKeyword,
246            "type" => GraphQLTokenType::TypeKeyword,
247            "interface" => GraphQLTokenType::InterfaceKeyword,
248            "union" => GraphQLTokenType::UnionKeyword,
249            "scalar" => GraphQLTokenType::ScalarKeyword,
250            "enum" => GraphQLTokenType::EnumKeyword,
251            "input" => GraphQLTokenType::InputKeyword,
252            "extend" => GraphQLTokenType::ExtendKeyword,
253            "schema" => GraphQLTokenType::SchemaKeyword,
254            "directive" => GraphQLTokenType::DirectiveKeyword,
255            "implements" => GraphQLTokenType::ImplementsKeyword,
256            "repeats" => GraphQLTokenType::RepeatsKeyword,
257
258            // Special literals
259            "true" | "false" => GraphQLTokenType::BooleanLiteral,
260            "null" => GraphQLTokenType::NullLiteral,
261
262            // Defaults to Name
263            _ => GraphQLTokenType::Name,
264        }
265    }
266
267    /// Lexes operators.
268    fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
269        let start = state.get_position();
270
271        // Triple-character operators
272        if state.starts_with("...") {
273            state.advance(3);
274            state.add_token(GraphQLTokenType::Spread, start, state.get_position());
275            return true;
276        }
277
278        false
279    }
280
281    /// Lexes single-character tokens.
282    fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
283        if let Some(ch) = state.peek() {
284            let start = state.get_position();
285            let kind = match ch {
286                '(' => Some(GraphQLTokenType::LeftParen),
287                ')' => Some(GraphQLTokenType::RightParen),
288                '[' => Some(GraphQLTokenType::LeftBracket),
289                ']' => Some(GraphQLTokenType::RightBracket),
290                '{' => Some(GraphQLTokenType::LeftBrace),
291                '}' => Some(GraphQLTokenType::RightBrace),
292                ',' => Some(GraphQLTokenType::Comma),
293                ':' => Some(GraphQLTokenType::Colon),
294                ';' => Some(GraphQLTokenType::Semicolon),
295                '|' => Some(GraphQLTokenType::Pipe),
296                '&' => Some(GraphQLTokenType::Ampersand),
297                '=' => Some(GraphQLTokenType::Equals),
298                '!' => Some(GraphQLTokenType::Exclamation),
299                '@' => Some(GraphQLTokenType::At),
300                '$' => Some(GraphQLTokenType::Dollar),
301                _ => None,
302            };
303
304            if let Some(token_kind) = kind {
305                state.advance(ch.len_utf8());
306                let end = state.get_position();
307                state.add_token(token_kind, start, end);
308                true
309            }
310            else {
311                false
312            }
313        }
314        else {
315            false
316        }
317    }
318}