1use crate::{kind::DotSyntaxKind, language::DotLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
3
4#[derive(Clone)]
7pub struct DotLexer<'config> {
8 config: &'config DotLanguage,
9}
10
11impl<'config> DotLexer<'config> {
12 pub fn new(config: &'config DotLanguage) -> Self {
13 Self { config }
14 }
15
16 fn skip_whitespace<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> bool {
18 let start_pos = state.get_position();
19
20 while let Some(ch) = state.peek() {
21 if ch == ' ' || ch == '\t' {
22 state.advance(ch.len_utf8());
23 }
24 else {
25 break;
26 }
27 }
28
29 if state.get_position() > start_pos {
30 state.add_token(DotSyntaxKind::Whitespace, start_pos, state.get_position());
31 true
32 }
33 else {
34 false
35 }
36 }
37
38 fn lex_newline<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> bool {
40 let start_pos = state.get_position();
41
42 if let Some('\n') = state.peek() {
43 state.advance(1);
44 state.add_token(DotSyntaxKind::Newline, start_pos, state.get_position());
45 true
46 }
47 else if let Some('\r') = state.peek() {
48 state.advance(1);
49 if let Some('\n') = state.peek() {
50 state.advance(1);
51 }
52 state.add_token(DotSyntaxKind::Newline, start_pos, state.get_position());
53 true
54 }
55 else {
56 false
57 }
58 }
59
60 fn lex_comment<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> bool {
62 let start_pos = state.get_position();
63
64 if let Some('/') = state.peek() {
65 match state.peek_next_n(1) {
66 Some('/') => {
67 state.advance(1);
69 state.advance(1);
70
71 while let Some(ch) = state.peek() {
72 if ch == '\n' || ch == '\r' {
73 break;
74 }
75 state.advance(ch.len_utf8());
76 }
77
78 state.add_token(DotSyntaxKind::Comment, start_pos, state.get_position());
79 true
80 }
81 Some('*') => {
82 state.advance(1);
84 state.advance(1);
85
86 while let Some(ch) = state.peek() {
87 if ch == '*' {
88 if state.peek_next_n(1) == Some('/') {
89 state.advance(1);
90 state.advance(1);
91 break;
92 }
93 }
94 state.advance(ch.len_utf8());
95 }
96
97 state.add_token(DotSyntaxKind::Comment, start_pos, state.get_position());
98 true
99 }
100 _ => false,
101 }
102 }
103 else if let Some('#') = state.peek() {
104 state.advance(1);
106
107 while let Some(ch) = state.peek() {
108 if ch == '\n' || ch == '\r' {
109 break;
110 }
111 state.advance(ch.len_utf8());
112 }
113
114 state.add_token(DotSyntaxKind::Comment, start_pos, state.get_position());
115 true
116 }
117 else {
118 false
119 }
120 }
121
122 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> bool {
124 let start_pos = state.get_position();
125
126 if let Some(ch) = state.peek() {
127 if ch.is_alphabetic() || ch == '_' {
128 state.advance(ch.len_utf8());
129
130 while let Some(ch) = state.peek() {
131 if ch.is_alphanumeric() || ch == '_' {
132 state.advance(ch.len_utf8());
133 }
134 else {
135 break;
136 }
137 }
138
139 let end_pos = state.get_position();
140 let text = state.get_text_in(core::range::Range { start: start_pos, end: end_pos });
141
142 let token_kind = match text.to_lowercase().as_str() {
143 "graph" => DotSyntaxKind::Graph,
144 "digraph" => DotSyntaxKind::Digraph,
145 "subgraph" => DotSyntaxKind::Subgraph,
146 "node" => DotSyntaxKind::Node,
147 "edge" => DotSyntaxKind::Edge,
148 "strict" => DotSyntaxKind::Strict,
149 _ => DotSyntaxKind::Identifier,
150 };
151
152 state.add_token(token_kind, start_pos, state.get_position());
153 true
154 }
155 else {
156 false
157 }
158 }
159 else {
160 false
161 }
162 }
163
164 fn lex_number<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> bool {
166 let start_pos = state.get_position();
167
168 if let Some(ch) = state.peek() {
169 let is_negative = ch == '-';
170 let mut has_digit = false;
171
172 if is_negative {
173 let next_pos = state.get_position() + 1;
175 if next_pos < state.length() {
176 let next_ch = state.get_char_at(next_pos);
177 if next_ch.map_or(false, |c| c.is_ascii_digit()) {
178 state.advance(1); }
180 else {
181 return false;
182 }
183 }
184 else {
185 return false;
186 }
187 }
188
189 if let Some(ch) = state.peek() {
190 if ch.is_ascii_digit() {
191 has_digit = true;
192 state.advance(ch.len_utf8());
193
194 while let Some(ch) = state.peek() {
196 if ch.is_ascii_digit() {
197 state.advance(ch.len_utf8());
198 }
199 else {
200 break;
201 }
202 }
203
204 if let Some('.') = state.peek() {
206 let dot_pos = state.get_position();
207 state.advance(1);
208
209 if let Some(ch) = state.peek() {
210 if ch.is_ascii_digit() {
211 while let Some(ch) = state.peek() {
212 if ch.is_ascii_digit() {
213 state.advance(ch.len_utf8());
214 }
215 else {
216 break;
217 }
218 }
219 }
220 else {
221 state.set_position(dot_pos);
223 }
224 }
225 else {
226 state.set_position(dot_pos);
228 }
229 }
230 }
231 }
232
233 if has_digit || (is_negative && state.get_position() > start_pos + 1) {
234 state.add_token(DotSyntaxKind::Number, start_pos, state.get_position());
235 true
236 }
237 else {
238 false
240 }
241 }
242 else {
243 false
244 }
245 }
246
247 fn lex_string<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> bool {
249 let start_pos = state.get_position();
250
251 if let Some('"') = state.peek() {
252 state.advance(1);
253
254 while let Some(ch) = state.peek() {
255 if ch == '"' {
256 state.advance(1);
257 state.add_token(DotSyntaxKind::String, start_pos, state.get_position());
258 return true;
259 }
260 else if ch == '\\' {
261 state.advance(1);
262 if state.peek().is_some() {
263 state.advance(1);
264 }
265 }
266 else {
267 state.advance(ch.len_utf8());
268 }
269 }
270
271 true
273 }
274 else {
275 false
276 }
277 }
278
279 fn lex_operator<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> bool {
281 let start_pos = state.get_position();
282
283 if let Some(ch) = state.peek() {
284 match ch {
285 '-' => {
286 let next_pos = state.get_position() + 1;
287 if next_pos < state.length() {
288 let next_ch = state.get_char_at(next_pos);
289 match next_ch {
290 Some('>') => {
291 state.advance(1);
292 state.advance(1);
293 state.add_token(DotSyntaxKind::Arrow, start_pos, state.get_position());
294 true
295 }
296 Some('-') => {
297 state.advance(1);
298 state.advance(1);
299 state.add_token(DotSyntaxKind::Line, start_pos, state.get_position());
300 true
301 }
302 _ => false,
303 }
304 }
305 else {
306 false
307 }
308 }
309 '=' => {
310 state.advance(1);
311 state.add_token(DotSyntaxKind::Equal, start_pos, state.get_position());
312 true
313 }
314 ';' => {
315 state.advance(1);
316 state.add_token(DotSyntaxKind::Semicolon, start_pos, state.get_position());
317 true
318 }
319 ',' => {
320 state.advance(1);
321 state.add_token(DotSyntaxKind::Comma, start_pos, state.get_position());
322 true
323 }
324 _ => false,
325 }
326 }
327 else {
328 false
329 }
330 }
331
332 fn lex_delimiter<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> bool {
334 let start_pos = state.get_position();
335
336 if let Some(ch) = state.peek() {
337 let token_kind = match ch {
338 '{' => DotSyntaxKind::LeftBrace,
339 '}' => DotSyntaxKind::RightBrace,
340 '[' => DotSyntaxKind::LeftBracket,
341 ']' => DotSyntaxKind::RightBracket,
342 '(' => DotSyntaxKind::LeftParen,
343 ')' => DotSyntaxKind::RightParen,
344 _ => return false,
345 };
346
347 state.advance(ch.len_utf8());
348 state.add_token(token_kind, start_pos, state.get_position());
349 true
350 }
351 else {
352 false
353 }
354 }
355}
356
357impl<'config> Lexer<DotLanguage> for DotLexer<'config> {
358 fn lex(&self, source: impl Source) -> LexOutput<DotLanguage> {
359 let mut state = LexerState::new(source);
360 let result = self.run(&mut state);
361 state.finish(result)
362 }
363
364 fn lex_incremental(
365 &self,
366 source: impl Source,
367 changed: usize,
368 cache: IncrementalCache<DotLanguage>,
369 ) -> LexOutput<DotLanguage> {
370 let mut state = LexerState::new_with_cache(source, changed, cache);
371 let result = self.run(&mut state);
372 state.finish(result)
373 }
374}
375
376impl<'config> DotLexer<'config> {
377 fn run<S: Source>(&self, state: &mut LexerState<S, DotLanguage>) -> Result<(), OakError> {
379 while state.not_at_end() {
380 let safe_point = state.get_position();
381
382 if self.skip_whitespace(state) {
384 continue;
385 }
386
387 if self.lex_newline(state) {
388 continue;
389 }
390
391 if self.lex_comment(state) {
392 continue;
393 }
394
395 if self.lex_identifier_or_keyword(state) {
396 continue;
397 }
398
399 if self.lex_number(state) {
400 continue;
401 }
402
403 if self.lex_string(state) {
404 continue;
405 }
406
407 if self.lex_operator(state) {
408 continue;
409 }
410
411 if self.lex_delimiter(state) {
412 continue;
413 }
414
415 let start_pos = state.get_position();
417 if let Some(ch) = state.peek() {
418 state.advance(ch.len_utf8());
419 state.add_token(DotSyntaxKind::Error, start_pos, state.get_position());
420 }
421
422 state.safe_check(safe_point);
423 }
424
425 let eof_pos = state.get_position();
427 state.add_token(DotSyntaxKind::Eof, eof_pos, eof_pos);
428
429 Ok(())
430 }
431}