1use crate::{kind::DotSyntaxKind, language::DotLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, DotLanguage>;
5
6#[derive(Clone)]
7pub struct DotLexer<'config> {
8 _config: &'config DotLanguage,
9}
10
11impl<'config> DotLexer<'config> {
12 pub fn new(config: &'config DotLanguage) -> Self {
13 Self { _config: config }
14 }
15
16 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
18 let start_pos = state.get_position();
19
20 while let Some(ch) = state.peek() {
21 if ch == ' ' || ch == '\t' {
22 state.advance(ch.len_utf8());
23 }
24 else {
25 break;
26 }
27 }
28
29 if state.get_position() > start_pos {
30 state.add_token(DotSyntaxKind::Whitespace, start_pos, state.get_position());
31 true
32 }
33 else {
34 false
35 }
36 }
37
38 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
40 let start_pos = state.get_position();
41
42 if let Some('\n') = state.peek() {
43 state.advance(1);
44 state.add_token(DotSyntaxKind::Newline, start_pos, state.get_position());
45 true
46 }
47 else if let Some('\r') = state.peek() {
48 state.advance(1);
49 if let Some('\n') = state.peek() {
50 state.advance(1);
51 }
52 state.add_token(DotSyntaxKind::Newline, start_pos, state.get_position());
53 true
54 }
55 else {
56 false
57 }
58 }
59
60 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
62 let start_pos = state.get_position();
63
64 if state.consume_if_starts_with("//") {
65 while let Some(ch) = state.peek() {
67 if ch == '\n' || ch == '\r' {
68 break;
69 }
70 state.advance(ch.len_utf8());
71 }
72
73 state.add_token(DotSyntaxKind::Comment, start_pos, state.get_position());
74 true
75 }
76 else if state.consume_if_starts_with("/*") {
77 while let Some(ch) = state.peek() {
79 if ch == '*' && state.peek_next_n(1) == Some('/') {
80 state.advance(2); break;
82 }
83 state.advance(ch.len_utf8());
84 }
85
86 state.add_token(DotSyntaxKind::Comment, start_pos, state.get_position());
87 true
88 }
89 else if state.consume_if_starts_with("#") {
90 while let Some(ch) = state.peek() {
92 if ch == '\n' || ch == '\r' {
93 break;
94 }
95 state.advance(ch.len_utf8());
96 }
97
98 state.add_token(DotSyntaxKind::Comment, start_pos, state.get_position());
99 true
100 }
101 else {
102 false
103 }
104 }
105
106 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
108 let start_pos = state.get_position();
109
110 if let Some(ch) = state.peek() {
111 if ch.is_alphabetic() || ch == '_' {
112 state.advance(ch.len_utf8());
113
114 while let Some(ch) = state.peek() {
115 if ch.is_alphanumeric() || ch == '_' {
116 state.advance(ch.len_utf8());
117 }
118 else {
119 break;
120 }
121 }
122
123 let end_pos = state.get_position();
124 let text = state.get_text_in((start_pos..end_pos).into());
125
126 let token_kind = match text.to_lowercase().as_str() {
127 "graph" => DotSyntaxKind::Graph,
128 "digraph" => DotSyntaxKind::Digraph,
129 "subgraph" => DotSyntaxKind::Subgraph,
130 "node" => DotSyntaxKind::Node,
131 "edge" => DotSyntaxKind::Edge,
132 "strict" => DotSyntaxKind::Strict,
133 _ => DotSyntaxKind::Identifier,
134 };
135
136 state.add_token(token_kind, start_pos, state.get_position());
137 true
138 }
139 else {
140 false
141 }
142 }
143 else {
144 false
145 }
146 }
147
148 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
150 let start_pos = state.get_position();
151
152 if let Some(ch) = state.peek() {
153 let is_negative = ch == '-';
154 let mut has_digit = false;
155
156 if is_negative {
157 if let Some(next_ch) = state.peek_next_n(1) {
159 if next_ch.is_ascii_digit() {
160 state.advance(1); }
162 else {
163 return false;
164 }
165 }
166 else {
167 return false;
168 }
169 }
170
171 if let Some(ch) = state.peek() {
172 if ch.is_ascii_digit() {
173 has_digit = true;
174 state.advance(ch.len_utf8());
175
176 while let Some(ch) = state.peek() {
178 if ch.is_ascii_digit() {
179 state.advance(ch.len_utf8());
180 }
181 else {
182 break;
183 }
184 }
185
186 if let Some('.') = state.peek() {
188 let dot_pos = state.get_position();
189 state.advance(1);
190
191 if let Some(ch) = state.peek() {
192 if ch.is_ascii_digit() {
193 while let Some(ch) = state.peek() {
194 if ch.is_ascii_digit() {
195 state.advance(ch.len_utf8());
196 }
197 else {
198 break;
199 }
200 }
201 }
202 else {
203 state.set_position(dot_pos);
205 }
206 }
207 else {
208 state.set_position(dot_pos);
210 }
211 }
212 }
213 }
214
215 if has_digit || (is_negative && state.get_position() > start_pos + 1) {
216 state.add_token(DotSyntaxKind::Number, start_pos, state.get_position());
217 true
218 }
219 else {
220 state.set_position(start_pos);
222 false
223 }
224 }
225 else {
226 false
227 }
228 }
229
230 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
232 let start_pos = state.get_position();
233
234 if let Some('"') = state.peek() {
235 state.advance(1);
236
237 while let Some(ch) = state.peek() {
238 if ch == '"' {
239 state.advance(1);
240 state.add_token(DotSyntaxKind::String, start_pos, state.get_position());
241 return true;
242 }
243 else if ch == '\\' {
244 state.advance(1);
245 if state.peek().is_some() {
246 state.advance(1);
247 }
248 }
249 else {
250 state.advance(ch.len_utf8());
251 }
252 }
253
254 state.add_token(DotSyntaxKind::Error, start_pos, state.get_position());
256 true
257 }
258 else {
259 false
260 }
261 }
262
263 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
265 let start_pos = state.get_position();
266
267 if state.consume_if_starts_with("->") {
268 state.add_token(DotSyntaxKind::Arrow, start_pos, state.get_position());
269 return true;
270 }
271 if state.consume_if_starts_with("--") {
272 state.add_token(DotSyntaxKind::Line, start_pos, state.get_position());
273 return true;
274 }
275
276 if let Some(ch) = state.peek() {
277 match ch {
278 '=' => {
279 state.advance(1);
280 state.add_token(DotSyntaxKind::Equal, start_pos, state.get_position());
281 true
282 }
283 ';' => {
284 state.advance(1);
285 state.add_token(DotSyntaxKind::Semicolon, start_pos, state.get_position());
286 true
287 }
288 ',' => {
289 state.advance(1);
290 state.add_token(DotSyntaxKind::Comma, start_pos, state.get_position());
291 true
292 }
293 _ => false,
294 }
295 }
296 else {
297 false
298 }
299 }
300
301 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
303 let start_pos = state.get_position();
304
305 if let Some(ch) = state.peek() {
306 let token_kind = match ch {
307 '{' => DotSyntaxKind::LeftBrace,
308 '}' => DotSyntaxKind::RightBrace,
309 '[' => DotSyntaxKind::LeftBracket,
310 ']' => DotSyntaxKind::RightBracket,
311 '(' => DotSyntaxKind::LeftParen,
312 ')' => DotSyntaxKind::RightParen,
313 _ => return false,
314 };
315
316 state.advance(ch.len_utf8());
317 state.add_token(token_kind, start_pos, state.get_position());
318 true
319 }
320 else {
321 false
322 }
323 }
324}
325
326impl<'config> Lexer<DotLanguage> for DotLexer<'config> {
327 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<DotLanguage>) -> LexOutput<DotLanguage> {
328 let mut state = State::new(source);
329 let result = self.run(&mut state);
330 if result.is_ok() {
331 state.add_eof();
332 }
333 state.finish_with_cache(result, cache)
334 }
335}
336
337impl<'config> DotLexer<'config> {
338 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
340 while state.not_at_end() {
341 let safe_point = state.get_position();
342
343 if self.skip_whitespace(state) {
345 continue;
346 }
347
348 if self.lex_newline(state) {
349 continue;
350 }
351
352 if self.lex_comment(state) {
353 continue;
354 }
355
356 if self.lex_identifier_or_keyword(state) {
357 continue;
358 }
359
360 if self.lex_number(state) {
361 continue;
362 }
363
364 if self.lex_string(state) {
365 continue;
366 }
367
368 if self.lex_operator(state) {
369 continue;
370 }
371
372 if self.lex_delimiter(state) {
373 continue;
374 }
375
376 let start_pos = state.get_position();
378 if let Some(ch) = state.peek() {
379 state.advance(ch.len_utf8());
380 state.add_token(DotSyntaxKind::Error, start_pos, state.get_position());
381 }
382
383 state.advance_if_dead_lock(safe_point);
384 }
385
386 Ok(())
387 }
388}