1use crate::{kind::GraphQLSyntaxKind, language::GraphQLLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError, TextEdit,
4 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, GraphQLLanguage>;
10
11static GRAPHQL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static GRAPHQL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "#", block_start: "", block_end: "", nested_blocks: false });
13static GRAPHQL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct GraphQLLexer;
17
18impl Lexer<GraphQLLanguage> for GraphQLLexer {
19 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<GraphQLLanguage>) -> LexOutput<GraphQLLanguage> {
20 let mut state = LexerState::new(text);
21 let result = self.run(&mut state);
22 if result.is_ok() {
23 state.add_eof();
24 }
25 state.finish_with_cache(result, cache)
26 }
27}
28
29impl GraphQLLexer {
30 pub fn new(_config: &GraphQLLanguage) -> Self {
31 Self
32 }
33
34 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
35 while state.not_at_end() {
36 let safe_point = state.get_position();
37
38 if self.skip_whitespace(state) {
39 continue;
40 }
41
42 if self.skip_comment(state) {
43 continue;
44 }
45
46 if self.lex_string_literal(state) {
47 continue;
48 }
49
50 if self.lex_number_literal(state) {
51 continue;
52 }
53
54 if self.lex_identifier_or_keyword(state) {
55 continue;
56 }
57
58 if self.lex_operators(state) {
59 continue;
60 }
61
62 if self.lex_single_char_tokens(state) {
63 continue;
64 }
65
66 state.advance_if_dead_lock(safe_point);
67 }
68
69 Ok(())
70 }
71
72 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
74 GRAPHQL_WHITESPACE.scan(state, GraphQLSyntaxKind::Whitespace)
75 }
76
77 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
79 GRAPHQL_COMMENT.scan(state, GraphQLSyntaxKind::Comment, GraphQLSyntaxKind::Comment)
80 }
81
82 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
84 if GRAPHQL_STRING.scan(state, GraphQLSyntaxKind::StringLiteral) {
86 return true;
87 }
88
89 if state.starts_with("\"\"\"") {
91 let start = state.get_position();
92 state.advance(3); while state.not_at_end() {
95 if state.starts_with("\"\"\"") {
96 state.advance(3); break;
98 }
99 if let Some(ch) = state.peek() {
100 state.advance(ch.len_utf8());
101 }
102 }
103
104 let end = state.get_position();
105 state.add_token(GraphQLSyntaxKind::StringLiteral, start, end);
106 return true;
107 }
108
109 false
110 }
111
112 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
114 let start = state.get_position();
115 let mut has_digits = false;
116 let mut is_float = false;
117
118 if state.starts_with("-") {
120 state.advance(1);
121 }
122
123 if state.starts_with("0") {
125 state.advance(1);
127 has_digits = true;
128 }
129 else {
130 while let Some(ch) = state.peek() {
132 if ch.is_ascii_digit() {
133 state.advance(ch.len_utf8());
134 has_digits = true;
135 }
136 else {
137 break;
138 }
139 }
140 }
141
142 if state.starts_with(".") && has_digits {
144 if let Some(next_ch) = state.peek_next_n(1) {
145 if next_ch.is_ascii_digit() {
146 state.advance(1); is_float = true;
148
149 while let Some(ch) = state.peek() {
150 if ch.is_ascii_digit() {
151 state.advance(ch.len_utf8());
152 }
153 else {
154 break;
155 }
156 }
157 }
158 }
159 }
160
161 if (state.starts_with("e") || state.starts_with("E")) && has_digits {
163 state.advance(1);
164 is_float = true;
165
166 if state.starts_with("+") || state.starts_with("-") {
168 state.advance(1);
169 }
170
171 let mut exp_digits = false;
173 while let Some(ch) = state.peek() {
174 if ch.is_ascii_digit() {
175 state.advance(ch.len_utf8());
176 exp_digits = true;
177 }
178 else {
179 break;
180 }
181 }
182 if !exp_digits {
183 return false;
184 }
185 }
186
187 if !has_digits {
188 return false;
189 }
190
191 let kind = if is_float { GraphQLSyntaxKind::FloatLiteral } else { GraphQLSyntaxKind::IntLiteral };
192 state.add_token(kind, start, state.get_position());
193 true
194 }
195
196 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
198 let start = state.get_position();
199
200 if let Some(first_ch) = state.peek() {
202 if !first_ch.is_alphabetic() && first_ch != '_' {
203 return false;
204 }
205
206 state.advance(first_ch.len_utf8());
207
208 while let Some(ch) = state.peek() {
210 if ch.is_alphanumeric() || ch == '_' {
211 state.advance(ch.len_utf8());
212 }
213 else {
214 break;
215 }
216 }
217
218 let end = state.get_position();
219 let text = state.get_text_in((start..end).into());
220 let kind = self.keyword_or_identifier(&text);
221 state.add_token(kind, start, end);
222 true
223 }
224 else {
225 false
226 }
227 }
228
229 fn keyword_or_identifier(&self, text: &str) -> GraphQLSyntaxKind {
231 match text {
232 "query" => GraphQLSyntaxKind::QueryKeyword,
234 "mutation" => GraphQLSyntaxKind::MutationKeyword,
235 "subscription" => GraphQLSyntaxKind::SubscriptionKeyword,
236 "fragment" => GraphQLSyntaxKind::FragmentKeyword,
237 "on" => GraphQLSyntaxKind::OnKeyword,
238 "type" => GraphQLSyntaxKind::TypeKeyword,
239 "interface" => GraphQLSyntaxKind::InterfaceKeyword,
240 "union" => GraphQLSyntaxKind::UnionKeyword,
241 "scalar" => GraphQLSyntaxKind::ScalarKeyword,
242 "enum" => GraphQLSyntaxKind::EnumKeyword,
243 "input" => GraphQLSyntaxKind::InputKeyword,
244 "extend" => GraphQLSyntaxKind::ExtendKeyword,
245 "schema" => GraphQLSyntaxKind::SchemaKeyword,
246 "directive" => GraphQLSyntaxKind::DirectiveKeyword,
247 "implements" => GraphQLSyntaxKind::ImplementsKeyword,
248 "repeats" => GraphQLSyntaxKind::RepeatsKeyword,
249
250 "true" | "false" => GraphQLSyntaxKind::BooleanLiteral,
252 "null" => GraphQLSyntaxKind::NullLiteral,
253
254 _ => GraphQLSyntaxKind::Name,
256 }
257 }
258
259 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
261 let start = state.get_position();
262
263 if state.starts_with("...") {
265 state.advance(3);
266 state.add_token(GraphQLSyntaxKind::Spread, start, state.get_position());
267 return true;
268 }
269
270 false
271 }
272
273 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
275 if let Some(ch) = state.peek() {
276 let start = state.get_position();
277 let kind = match ch {
278 '(' => Some(GraphQLSyntaxKind::LeftParen),
279 ')' => Some(GraphQLSyntaxKind::RightParen),
280 '[' => Some(GraphQLSyntaxKind::LeftBracket),
281 ']' => Some(GraphQLSyntaxKind::RightBracket),
282 '{' => Some(GraphQLSyntaxKind::LeftBrace),
283 '}' => Some(GraphQLSyntaxKind::RightBrace),
284 ',' => Some(GraphQLSyntaxKind::Comma),
285 ':' => Some(GraphQLSyntaxKind::Colon),
286 ';' => Some(GraphQLSyntaxKind::Semicolon),
287 '|' => Some(GraphQLSyntaxKind::Pipe),
288 '&' => Some(GraphQLSyntaxKind::Ampersand),
289 '=' => Some(GraphQLSyntaxKind::Equals),
290 '!' => Some(GraphQLSyntaxKind::Exclamation),
291 '@' => Some(GraphQLSyntaxKind::At),
292 '$' => Some(GraphQLSyntaxKind::Dollar),
293 _ => None,
294 };
295
296 if let Some(token_kind) = kind {
297 state.advance(ch.len_utf8());
298 let end = state.get_position();
299 state.add_token(token_kind, start, end);
300 true
301 }
302 else {
303 false
304 }
305 }
306 else {
307 false
308 }
309 }
310}