1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::GraphQLLanguage, lexer::token_type::GraphQLTokenType};
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError, TextEdit,
8 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
9 source::Source,
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, GraphQLLanguage>;
14
15static GRAPHQL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static GRAPHQL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "#", block_start: "", block_end: "", nested_blocks: false });
17static GRAPHQL_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18
19#[derive(Clone, Debug)]
21pub struct GraphQLLexer<'config> {
22 config: &'config GraphQLLanguage,
23}
24
25impl<'config> Lexer<GraphQLLanguage> for GraphQLLexer<'config> {
26 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<GraphQLLanguage>) -> LexOutput<GraphQLLanguage> {
27 let mut state = LexerState::new(text);
28 let result = self.run(&mut state);
29 if result.is_ok() {
30 state.add_eof();
31 }
32 state.finish_with_cache(result, cache)
33 }
34}
35
36impl<'config> GraphQLLexer<'config> {
37 pub fn new(config: &'config GraphQLLanguage) -> Self {
39 Self { config }
40 }
41
42 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
43 while state.not_at_end() {
44 let safe_point = state.get_position();
45
46 if self.skip_whitespace(state) {
47 continue;
48 }
49
50 if self.skip_comment(state) {
51 continue;
52 }
53
54 if self.lex_string_literal(state) {
55 continue;
56 }
57
58 if self.lex_number_literal(state) {
59 continue;
60 }
61
62 if self.lex_identifier_or_keyword(state) {
63 continue;
64 }
65
66 if self.lex_operators(state) {
67 continue;
68 }
69
70 if self.lex_single_char_tokens(state) {
71 continue;
72 }
73
74 state.advance_if_dead_lock(safe_point);
75 }
76
77 Ok(())
78 }
79
80 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
82 GRAPHQL_WHITESPACE.scan(state, GraphQLTokenType::Whitespace)
83 }
84
85 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
87 GRAPHQL_COMMENT.scan(state, GraphQLTokenType::Comment, GraphQLTokenType::Comment)
88 }
89
90 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
92 if GRAPHQL_STRING.scan(state, GraphQLTokenType::StringLiteral) {
94 return true;
95 }
96
97 if state.starts_with("\"\"\"") {
99 let start = state.get_position();
100 state.advance(3); while state.not_at_end() {
103 if state.starts_with("\"\"\"") {
104 state.advance(3); break;
106 }
107 if let Some(ch) = state.peek() {
108 state.advance(ch.len_utf8());
109 }
110 }
111
112 let end = state.get_position();
113 state.add_token(GraphQLTokenType::StringLiteral, start, end);
114 return true;
115 }
116
117 false
118 }
119
120 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
122 let start = state.get_position();
123 let mut has_digits = false;
124 let mut is_float = false;
125
126 if state.starts_with("-") {
128 state.advance(1);
129 }
130
131 if state.starts_with("0") {
133 state.advance(1);
135 has_digits = true;
136 }
137 else {
138 while let Some(ch) = state.peek() {
140 if ch.is_ascii_digit() {
141 state.advance(ch.len_utf8());
142 has_digits = true;
143 }
144 else {
145 break;
146 }
147 }
148 }
149
150 if state.starts_with(".") && has_digits {
152 if let Some(next_ch) = state.peek_next_n(1) {
153 if next_ch.is_ascii_digit() {
154 state.advance(1); is_float = true;
156
157 while let Some(ch) = state.peek() {
158 if ch.is_ascii_digit() {
159 state.advance(ch.len_utf8());
160 }
161 else {
162 break;
163 }
164 }
165 }
166 }
167 }
168
169 if (state.starts_with("e") || state.starts_with("E")) && has_digits {
171 state.advance(1);
172 is_float = true;
173
174 if state.starts_with("+") || state.starts_with("-") {
176 state.advance(1);
177 }
178
179 let mut exp_digits = false;
181 while let Some(ch) = state.peek() {
182 if ch.is_ascii_digit() {
183 state.advance(ch.len_utf8());
184 exp_digits = true;
185 }
186 else {
187 break;
188 }
189 }
190 if !exp_digits {
191 return false;
192 }
193 }
194
195 if !has_digits {
196 return false;
197 }
198
199 let kind = if is_float { GraphQLTokenType::FloatLiteral } else { GraphQLTokenType::IntLiteral };
200 state.add_token(kind, start, state.get_position());
201 true
202 }
203
204 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
206 let start = state.get_position();
207
208 if let Some(first_ch) = state.peek() {
210 if !first_ch.is_alphabetic() && first_ch != '_' {
211 return false;
212 }
213
214 state.advance(first_ch.len_utf8());
215
216 while let Some(ch) = state.peek() {
218 if ch.is_alphanumeric() || ch == '_' {
219 state.advance(ch.len_utf8());
220 }
221 else {
222 break;
223 }
224 }
225
226 let end = state.get_position();
227 let text = state.get_text_in((start..end).into());
228 let kind = self.keyword_or_identifier(&text);
229 state.add_token(kind, start, end);
230 true
231 }
232 else {
233 false
234 }
235 }
236
237 fn keyword_or_identifier(&self, text: &str) -> GraphQLTokenType {
239 match text {
240 "query" => GraphQLTokenType::QueryKeyword,
242 "mutation" => GraphQLTokenType::MutationKeyword,
243 "subscription" => GraphQLTokenType::SubscriptionKeyword,
244 "fragment" => GraphQLTokenType::FragmentKeyword,
245 "on" => GraphQLTokenType::OnKeyword,
246 "type" => GraphQLTokenType::TypeKeyword,
247 "interface" => GraphQLTokenType::InterfaceKeyword,
248 "union" => GraphQLTokenType::UnionKeyword,
249 "scalar" => GraphQLTokenType::ScalarKeyword,
250 "enum" => GraphQLTokenType::EnumKeyword,
251 "input" => GraphQLTokenType::InputKeyword,
252 "extend" => GraphQLTokenType::ExtendKeyword,
253 "schema" => GraphQLTokenType::SchemaKeyword,
254 "directive" => GraphQLTokenType::DirectiveKeyword,
255 "implements" => GraphQLTokenType::ImplementsKeyword,
256 "repeats" => GraphQLTokenType::RepeatsKeyword,
257
258 "true" | "false" => GraphQLTokenType::BooleanLiteral,
260 "null" => GraphQLTokenType::NullLiteral,
261
262 _ => GraphQLTokenType::Name,
264 }
265 }
266
267 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
269 let start = state.get_position();
270
271 if state.starts_with("...") {
273 state.advance(3);
274 state.add_token(GraphQLTokenType::Spread, start, state.get_position());
275 return true;
276 }
277
278 false
279 }
280
281 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
283 if let Some(ch) = state.peek() {
284 let start = state.get_position();
285 let kind = match ch {
286 '(' => Some(GraphQLTokenType::LeftParen),
287 ')' => Some(GraphQLTokenType::RightParen),
288 '[' => Some(GraphQLTokenType::LeftBracket),
289 ']' => Some(GraphQLTokenType::RightBracket),
290 '{' => Some(GraphQLTokenType::LeftBrace),
291 '}' => Some(GraphQLTokenType::RightBrace),
292 ',' => Some(GraphQLTokenType::Comma),
293 ':' => Some(GraphQLTokenType::Colon),
294 ';' => Some(GraphQLTokenType::Semicolon),
295 '|' => Some(GraphQLTokenType::Pipe),
296 '&' => Some(GraphQLTokenType::Ampersand),
297 '=' => Some(GraphQLTokenType::Equals),
298 '!' => Some(GraphQLTokenType::Exclamation),
299 '@' => Some(GraphQLTokenType::At),
300 '$' => Some(GraphQLTokenType::Dollar),
301 _ => None,
302 };
303
304 if let Some(token_kind) = kind {
305 state.advance(ch.len_utf8());
306 let end = state.get_position();
307 state.add_token(token_kind, start, end);
308 true
309 }
310 else {
311 false
312 }
313 }
314 else {
315 false
316 }
317 }
318}