1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::DotLanguage, lexer::token_type::DotTokenType};
6use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
7
8pub(crate) type State<'a, S> = LexerState<'a, S, DotLanguage>;
9
10#[derive(Clone)]
12pub struct DotLexer<'config> {
13 config: &'config DotLanguage,
14}
15
16impl<'config> DotLexer<'config> {
17 pub fn new(config: &'config DotLanguage) -> Self {
19 Self { config }
20 }
21
22 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
24 let start_pos = state.get_position();
25
26 while let Some(ch) = state.peek() {
27 if ch == ' ' || ch == '\t' {
28 state.advance(ch.len_utf8());
29 }
30 else {
31 break;
32 }
33 }
34
35 if state.get_position() > start_pos {
36 state.add_token(DotTokenType::Whitespace, start_pos, state.get_position());
37 true
38 }
39 else {
40 false
41 }
42 }
43
44 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
46 let start_pos = state.get_position();
47
48 if let Some('\n') = state.peek() {
49 state.advance(1);
50 state.add_token(DotTokenType::Newline, start_pos, state.get_position());
51 true
52 }
53 else if let Some('\r') = state.peek() {
54 state.advance(1);
55 if let Some('\n') = state.peek() {
56 state.advance(1);
57 }
58 state.add_token(DotTokenType::Newline, start_pos, state.get_position());
59 true
60 }
61 else {
62 false
63 }
64 }
65
66 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
68 let start_pos = state.get_position();
69
70 if state.consume_if_starts_with("//") {
71 while let Some(ch) = state.peek() {
73 if ch == '\n' || ch == '\r' {
74 break;
75 }
76 state.advance(ch.len_utf8());
77 }
78
79 state.add_token(DotTokenType::Comment, start_pos, state.get_position());
80 true
81 }
82 else if state.consume_if_starts_with("/*") {
83 while let Some(ch) = state.peek() {
85 if ch == '*' && state.peek_next_n(1) == Some('/') {
86 state.advance(2); break;
88 }
89 state.advance(ch.len_utf8());
90 }
91
92 state.add_token(DotTokenType::Comment, start_pos, state.get_position());
93 true
94 }
95 else if state.consume_if_starts_with("#") {
96 while let Some(ch) = state.peek() {
98 if ch == '\n' || ch == '\r' {
99 break;
100 }
101 state.advance(ch.len_utf8());
102 }
103
104 state.add_token(DotTokenType::Comment, start_pos, state.get_position());
105 true
106 }
107 else {
108 false
109 }
110 }
111
112 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
114 let start_pos = state.get_position();
115
116 if let Some(ch) = state.peek() {
117 if ch.is_alphabetic() || ch == '_' {
118 state.advance(ch.len_utf8());
119
120 while let Some(ch) = state.peek() {
121 if ch.is_alphanumeric() || ch == '_' {
122 state.advance(ch.len_utf8());
123 }
124 else {
125 break;
126 }
127 }
128
129 let end_pos = state.get_position();
130 let text = state.get_text_in((start_pos..end_pos).into());
131
132 let token_kind = match text.to_lowercase().as_str() {
133 "graph" => DotTokenType::Graph,
134 "digraph" => DotTokenType::Digraph,
135 "subgraph" => DotTokenType::Subgraph,
136 "node" => DotTokenType::Node,
137 "edge" => DotTokenType::Edge,
138 "strict" => DotTokenType::Strict,
139 _ => DotTokenType::Identifier,
140 };
141
142 state.add_token(token_kind, start_pos, state.get_position());
143 true
144 }
145 else {
146 false
147 }
148 }
149 else {
150 false
151 }
152 }
153
154 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
156 let start_pos = state.get_position();
157
158 if let Some(ch) = state.peek() {
159 let is_negative = ch == '-';
160 let mut has_digit = false;
161
162 if is_negative {
163 if let Some(next_ch) = state.peek_next_n(1) {
165 if next_ch.is_ascii_digit() {
166 state.advance(1); }
168 else {
169 return false;
170 }
171 }
172 else {
173 return false;
174 }
175 }
176
177 if let Some(ch) = state.peek() {
178 if ch.is_ascii_digit() {
179 has_digit = true;
180 state.advance(ch.len_utf8());
181
182 while let Some(ch) = state.peek() {
184 if ch.is_ascii_digit() {
185 state.advance(ch.len_utf8());
186 }
187 else {
188 break;
189 }
190 }
191
192 if let Some('.') = state.peek() {
194 let dot_pos = state.get_position();
195 state.advance(1);
196
197 if let Some(ch) = state.peek() {
198 if ch.is_ascii_digit() {
199 while let Some(ch) = state.peek() {
200 if ch.is_ascii_digit() {
201 state.advance(ch.len_utf8());
202 }
203 else {
204 break;
205 }
206 }
207 }
208 else {
209 state.set_position(dot_pos);
211 }
212 }
213 else {
214 state.set_position(dot_pos);
216 }
217 }
218 }
219 }
220
221 if has_digit || (is_negative && state.get_position() > start_pos + 1) {
222 state.add_token(DotTokenType::Number, start_pos, state.get_position());
223 true
224 }
225 else {
226 state.set_position(start_pos);
228 false
229 }
230 }
231 else {
232 false
233 }
234 }
235
236 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
238 let start_pos = state.get_position();
239
240 if let Some('"') = state.peek() {
241 state.advance(1);
242
243 while let Some(ch) = state.peek() {
244 if ch == '"' {
245 state.advance(1);
246 state.add_token(DotTokenType::String, start_pos, state.get_position());
247 return true;
248 }
249 else if ch == '\\' {
250 state.advance(1);
251 if state.peek().is_some() {
252 state.advance(1);
253 }
254 }
255 else {
256 state.advance(ch.len_utf8());
257 }
258 }
259
260 state.add_token(DotTokenType::Error, start_pos, state.get_position());
262 true
263 }
264 else {
265 false
266 }
267 }
268
269 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
271 let start_pos = state.get_position();
272
273 if state.consume_if_starts_with("->") {
274 state.add_token(DotTokenType::Arrow, start_pos, state.get_position());
275 return true;
276 }
277 if state.consume_if_starts_with("--") {
278 state.add_token(DotTokenType::Line, start_pos, state.get_position());
279 return true;
280 }
281
282 if let Some(ch) = state.peek() {
283 match ch {
284 '=' => {
285 state.advance(1);
286 state.add_token(DotTokenType::Equal, start_pos, state.get_position());
287 true
288 }
289 ';' => {
290 state.advance(1);
291 state.add_token(DotTokenType::Semicolon, start_pos, state.get_position());
292 true
293 }
294 ',' => {
295 state.advance(1);
296 state.add_token(DotTokenType::Comma, start_pos, state.get_position());
297 true
298 }
299 _ => false,
300 }
301 }
302 else {
303 false
304 }
305 }
306
307 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
309 let start_pos = state.get_position();
310
311 if let Some(ch) = state.peek() {
312 let token_kind = match ch {
313 '{' => DotTokenType::LeftBrace,
314 '}' => DotTokenType::RightBrace,
315 '[' => DotTokenType::LeftBracket,
316 ']' => DotTokenType::RightBracket,
317 '(' => DotTokenType::LeftParen,
318 ')' => DotTokenType::RightParen,
319 _ => return false,
320 };
321
322 state.advance(ch.len_utf8());
323 state.add_token(token_kind, start_pos, state.get_position());
324 true
325 }
326 else {
327 false
328 }
329 }
330}
331
332impl<'config> Lexer<DotLanguage> for DotLexer<'config> {
333 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<DotLanguage>) -> LexOutput<DotLanguage> {
334 let mut state = State::new(source);
335 let result = self.run(&mut state);
336 if result.is_ok() {
337 state.add_eof();
338 }
339 state.finish_with_cache(result, cache)
340 }
341}
342
343impl<'config> DotLexer<'config> {
344 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
346 while state.not_at_end() {
347 let safe_point = state.get_position();
348
349 if self.skip_whitespace(state) {
351 continue;
352 }
353
354 if self.lex_newline(state) {
355 continue;
356 }
357
358 if self.lex_comment(state) {
359 continue;
360 }
361
362 if self.lex_identifier_or_keyword(state) {
363 continue;
364 }
365
366 if self.lex_number(state) {
367 continue;
368 }
369
370 if self.lex_string(state) {
371 continue;
372 }
373
374 if self.lex_operator(state) {
375 continue;
376 }
377
378 if self.lex_delimiter(state) {
379 continue;
380 }
381
382 let start_pos = state.get_position();
384 if let Some(ch) = state.peek() {
385 state.advance(ch.len_utf8());
386 state.add_token(DotTokenType::Error, start_pos, state.get_position());
387 }
388
389 state.advance_if_dead_lock(safe_point);
390 }
391
392 Ok(())
393 }
394}