1use crate::{kind::GraphQLSyntaxKind, language::GraphQLLanguage};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError,
4 lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, GraphQLLanguage>;
10
11static GRAPHQL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static GRAPHQL_COMMENT: LazyLock<CommentLine> = LazyLock::new(|| CommentLine { line_markers: &["#"] });
13static GRAPHQL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct GraphQLLexer<'config> {
17 config: &'config GraphQLLanguage,
18}
19
20impl<'config> Lexer<GraphQLLanguage> for GraphQLLexer<'config> {
21 fn lex_incremental(
22 &self,
23 source: impl Source,
24 changed: usize,
25 cache: IncrementalCache<GraphQLLanguage>,
26 ) -> LexOutput<GraphQLLanguage> {
27 let mut state = LexerState::new_with_cache(source, changed, cache);
28 let result = self.run(&mut state);
29 state.finish(result)
30 }
31}
32
33impl<'config> GraphQLLexer<'config> {
34 pub fn new(config: &'config GraphQLLanguage) -> Self {
35 Self { config }
36 }
37
38 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.skip_comment(state) {
47 continue;
48 }
49
50 if self.lex_string_literal(state) {
51 continue;
52 }
53
54 if self.lex_number_literal(state) {
55 continue;
56 }
57
58 if self.lex_identifier_or_keyword(state) {
59 continue;
60 }
61
62 if self.lex_operators(state) {
63 continue;
64 }
65
66 if self.lex_single_char_tokens(state) {
67 continue;
68 }
69
70 state.safe_check(safe_point);
71 }
72
73 let eof_pos = state.get_position();
75 state.add_token(GraphQLSyntaxKind::Eof, eof_pos, eof_pos);
76 Ok(())
77 }
78
79 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
81 match GRAPHQL_WHITESPACE.scan(state.rest(), state.get_position(), GraphQLSyntaxKind::Whitespace) {
82 Some(token) => {
83 state.advance_with(token);
84 true
85 }
86 None => false,
87 }
88 }
89
90 fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
92 match GRAPHQL_COMMENT.scan(state.rest(), state.get_position(), GraphQLSyntaxKind::Comment) {
93 Some(token) => {
94 state.advance_with(token);
95 true
96 }
97 None => false,
98 }
99 }
100
101 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
103 if let Some(token) = GRAPHQL_STRING.scan(state.rest(), state.get_position(), GraphQLSyntaxKind::StringLiteral) {
105 state.advance_with(token);
106 return true;
107 }
108
109 if state.rest().starts_with("\"\"\"") {
111 let start = state.get_position();
112 state.advance(3); while state.not_at_end() {
115 if state.rest().starts_with("\"\"\"") {
116 state.advance(3); break;
118 }
119 if let Some(ch) = state.peek() {
120 state.advance(ch.len_utf8());
121 }
122 }
123
124 let end = state.get_position();
125 state.add_token(GraphQLSyntaxKind::StringLiteral, start, end);
126 return true;
127 }
128
129 false
130 }
131
132 fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
134 let start = state.get_position();
135 let mut has_digits = false;
136 let mut is_float = false;
137
138 if state.rest().starts_with('-') {
140 state.advance(1);
141 }
142
143 if state.rest().starts_with('0') {
145 state.advance(1);
147 has_digits = true;
148 }
149 else {
150 while let Some(ch) = state.peek() {
152 if ch.is_ascii_digit() {
153 state.advance(ch.len_utf8());
154 has_digits = true;
155 }
156 else {
157 break;
158 }
159 }
160 }
161
162 if state.rest().starts_with('.') && has_digits {
164 if let Some(next_ch) = state.rest().chars().nth(1) {
165 if next_ch.is_ascii_digit() {
166 state.advance(1); is_float = true;
168
169 while let Some(ch) = state.peek() {
170 if ch.is_ascii_digit() {
171 state.advance(ch.len_utf8());
172 }
173 else {
174 break;
175 }
176 }
177 }
178 }
179 }
180
181 if (state.rest().starts_with('e') || state.rest().starts_with('E')) && has_digits {
183 state.advance(1);
184 is_float = true;
185
186 if state.rest().starts_with('+') || state.rest().starts_with('-') {
188 state.advance(1);
189 }
190
191 let mut exp_digits = false;
193 while let Some(ch) = state.peek() {
194 if ch.is_ascii_digit() {
195 state.advance(ch.len_utf8());
196 exp_digits = true;
197 }
198 else {
199 break;
200 }
201 }
202
203 if !exp_digits {
204 return false;
206 }
207 }
208
209 if has_digits {
210 let end = state.get_position();
211 let kind = if is_float { GraphQLSyntaxKind::FloatLiteral } else { GraphQLSyntaxKind::IntLiteral };
212 state.add_token(kind, start, end);
213 true
214 }
215 else {
216 false
217 }
218 }
219
220 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
222 let start = state.get_position();
223
224 if let Some(first_ch) = state.peek() {
226 if !first_ch.is_alphabetic() && first_ch != '_' {
227 return false;
228 }
229
230 state.advance(first_ch.len_utf8());
231
232 while let Some(ch) = state.peek() {
234 if ch.is_alphanumeric() || ch == '_' {
235 state.advance(ch.len_utf8());
236 }
237 else {
238 break;
239 }
240 }
241
242 let end = state.get_position();
243 let text = state.get_text_in((start..end).into());
244 let kind = self.keyword_or_identifier(&text);
245 state.add_token(kind, start, end);
246 true
247 }
248 else {
249 false
250 }
251 }
252
253 fn keyword_or_identifier(&self, text: &str) -> GraphQLSyntaxKind {
255 match text {
256 "query" => GraphQLSyntaxKind::QueryKeyword,
258 "mutation" => GraphQLSyntaxKind::MutationKeyword,
259 "subscription" => GraphQLSyntaxKind::SubscriptionKeyword,
260 "fragment" => GraphQLSyntaxKind::FragmentKeyword,
261 "on" => GraphQLSyntaxKind::OnKeyword,
262 "type" => GraphQLSyntaxKind::TypeKeyword,
263 "interface" => GraphQLSyntaxKind::InterfaceKeyword,
264 "union" => GraphQLSyntaxKind::UnionKeyword,
265 "scalar" => GraphQLSyntaxKind::ScalarKeyword,
266 "enum" => GraphQLSyntaxKind::EnumKeyword,
267 "input" => GraphQLSyntaxKind::InputKeyword,
268 "extend" => GraphQLSyntaxKind::ExtendKeyword,
269 "schema" => GraphQLSyntaxKind::SchemaKeyword,
270 "directive" => GraphQLSyntaxKind::DirectiveKeyword,
271 "implements" => GraphQLSyntaxKind::ImplementsKeyword,
272 "repeats" => GraphQLSyntaxKind::RepeatsKeyword,
273
274 "true" | "false" => GraphQLSyntaxKind::BooleanLiteral,
276 "null" => GraphQLSyntaxKind::NullLiteral,
277
278 _ => GraphQLSyntaxKind::Name,
280 }
281 }
282
283 fn lex_operators<S: Source>(&self, state: &mut State<S>) -> bool {
285 let start = state.get_position();
286 let rest = state.rest();
287
288 if rest.starts_with("...") {
290 state.advance(3);
291 state.add_token(GraphQLSyntaxKind::Spread, start, state.get_position());
292 return true;
293 }
294
295 false
296 }
297
298 fn lex_single_char_tokens<S: Source>(&self, state: &mut State<S>) -> bool {
300 if let Some(ch) = state.peek() {
301 let start = state.get_position();
302 let kind = match ch {
303 '(' => Some(GraphQLSyntaxKind::LeftParen),
304 ')' => Some(GraphQLSyntaxKind::RightParen),
305 '[' => Some(GraphQLSyntaxKind::LeftBracket),
306 ']' => Some(GraphQLSyntaxKind::RightBracket),
307 '{' => Some(GraphQLSyntaxKind::LeftBrace),
308 '}' => Some(GraphQLSyntaxKind::RightBrace),
309 ',' => Some(GraphQLSyntaxKind::Comma),
310 ':' => Some(GraphQLSyntaxKind::Colon),
311 ';' => Some(GraphQLSyntaxKind::Semicolon),
312 '|' => Some(GraphQLSyntaxKind::Pipe),
313 '&' => Some(GraphQLSyntaxKind::Ampersand),
314 '=' => Some(GraphQLSyntaxKind::Equals),
315 '!' => Some(GraphQLSyntaxKind::Exclamation),
316 '@' => Some(GraphQLSyntaxKind::At),
317 '$' => Some(GraphQLSyntaxKind::Dollar),
318 _ => None,
319 };
320
321 if let Some(token_kind) = kind {
322 state.advance(ch.len_utf8());
323 let end = state.get_position();
324 state.add_token(token_kind, start, end);
325 true
326 }
327 else {
328 false
329 }
330 }
331 else {
332 false
333 }
334 }
335}