1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::DotLanguage, lexer::token_type::DotTokenType};
5use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
6
7type State<'a, S> = LexerState<'a, S, DotLanguage>;
8
9#[derive(Clone)]
10pub struct DotLexer<'config> {
11 _config: &'config DotLanguage,
12}
13
14impl<'config> DotLexer<'config> {
15 pub fn new(config: &'config DotLanguage) -> Self {
16 Self { _config: config }
17 }
18
19 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
21 let start_pos = state.get_position();
22
23 while let Some(ch) = state.peek() {
24 if ch == ' ' || ch == '\t' {
25 state.advance(ch.len_utf8());
26 }
27 else {
28 break;
29 }
30 }
31
32 if state.get_position() > start_pos {
33 state.add_token(DotTokenType::Whitespace, start_pos, state.get_position());
34 true
35 }
36 else {
37 false
38 }
39 }
40
41 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
43 let start_pos = state.get_position();
44
45 if let Some('\n') = state.peek() {
46 state.advance(1);
47 state.add_token(DotTokenType::Newline, start_pos, state.get_position());
48 true
49 }
50 else if let Some('\r') = state.peek() {
51 state.advance(1);
52 if let Some('\n') = state.peek() {
53 state.advance(1);
54 }
55 state.add_token(DotTokenType::Newline, start_pos, state.get_position());
56 true
57 }
58 else {
59 false
60 }
61 }
62
63 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
65 let start_pos = state.get_position();
66
67 if state.consume_if_starts_with("//") {
68 while let Some(ch) = state.peek() {
70 if ch == '\n' || ch == '\r' {
71 break;
72 }
73 state.advance(ch.len_utf8());
74 }
75
76 state.add_token(DotTokenType::Comment, start_pos, state.get_position());
77 true
78 }
79 else if state.consume_if_starts_with("/*") {
80 while let Some(ch) = state.peek() {
82 if ch == '*' && state.peek_next_n(1) == Some('/') {
83 state.advance(2); break;
85 }
86 state.advance(ch.len_utf8());
87 }
88
89 state.add_token(DotTokenType::Comment, start_pos, state.get_position());
90 true
91 }
92 else if state.consume_if_starts_with("#") {
93 while let Some(ch) = state.peek() {
95 if ch == '\n' || ch == '\r' {
96 break;
97 }
98 state.advance(ch.len_utf8());
99 }
100
101 state.add_token(DotTokenType::Comment, start_pos, state.get_position());
102 true
103 }
104 else {
105 false
106 }
107 }
108
109 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
111 let start_pos = state.get_position();
112
113 if let Some(ch) = state.peek() {
114 if ch.is_alphabetic() || ch == '_' {
115 state.advance(ch.len_utf8());
116
117 while let Some(ch) = state.peek() {
118 if ch.is_alphanumeric() || ch == '_' {
119 state.advance(ch.len_utf8());
120 }
121 else {
122 break;
123 }
124 }
125
126 let end_pos = state.get_position();
127 let text = state.get_text_in((start_pos..end_pos).into());
128
129 let token_kind = match text.to_lowercase().as_str() {
130 "graph" => DotTokenType::Graph,
131 "digraph" => DotTokenType::Digraph,
132 "subgraph" => DotTokenType::Subgraph,
133 "node" => DotTokenType::Node,
134 "edge" => DotTokenType::Edge,
135 "strict" => DotTokenType::Strict,
136 _ => DotTokenType::Identifier,
137 };
138
139 state.add_token(token_kind, start_pos, state.get_position());
140 true
141 }
142 else {
143 false
144 }
145 }
146 else {
147 false
148 }
149 }
150
151 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
153 let start_pos = state.get_position();
154
155 if let Some(ch) = state.peek() {
156 let is_negative = ch == '-';
157 let mut has_digit = false;
158
159 if is_negative {
160 if let Some(next_ch) = state.peek_next_n(1) {
162 if next_ch.is_ascii_digit() {
163 state.advance(1); }
165 else {
166 return false;
167 }
168 }
169 else {
170 return false;
171 }
172 }
173
174 if let Some(ch) = state.peek() {
175 if ch.is_ascii_digit() {
176 has_digit = true;
177 state.advance(ch.len_utf8());
178
179 while let Some(ch) = state.peek() {
181 if ch.is_ascii_digit() {
182 state.advance(ch.len_utf8());
183 }
184 else {
185 break;
186 }
187 }
188
189 if let Some('.') = state.peek() {
191 let dot_pos = state.get_position();
192 state.advance(1);
193
194 if let Some(ch) = state.peek() {
195 if ch.is_ascii_digit() {
196 while let Some(ch) = state.peek() {
197 if ch.is_ascii_digit() {
198 state.advance(ch.len_utf8());
199 }
200 else {
201 break;
202 }
203 }
204 }
205 else {
206 state.set_position(dot_pos);
208 }
209 }
210 else {
211 state.set_position(dot_pos);
213 }
214 }
215 }
216 }
217
218 if has_digit || (is_negative && state.get_position() > start_pos + 1) {
219 state.add_token(DotTokenType::Number, start_pos, state.get_position());
220 true
221 }
222 else {
223 state.set_position(start_pos);
225 false
226 }
227 }
228 else {
229 false
230 }
231 }
232
233 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
235 let start_pos = state.get_position();
236
237 if let Some('"') = state.peek() {
238 state.advance(1);
239
240 while let Some(ch) = state.peek() {
241 if ch == '"' {
242 state.advance(1);
243 state.add_token(DotTokenType::String, start_pos, state.get_position());
244 return true;
245 }
246 else if ch == '\\' {
247 state.advance(1);
248 if state.peek().is_some() {
249 state.advance(1);
250 }
251 }
252 else {
253 state.advance(ch.len_utf8());
254 }
255 }
256
257 state.add_token(DotTokenType::Error, start_pos, state.get_position());
259 true
260 }
261 else {
262 false
263 }
264 }
265
266 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
268 let start_pos = state.get_position();
269
270 if state.consume_if_starts_with("->") {
271 state.add_token(DotTokenType::Arrow, start_pos, state.get_position());
272 return true;
273 }
274 if state.consume_if_starts_with("--") {
275 state.add_token(DotTokenType::Line, start_pos, state.get_position());
276 return true;
277 }
278
279 if let Some(ch) = state.peek() {
280 match ch {
281 '=' => {
282 state.advance(1);
283 state.add_token(DotTokenType::Equal, start_pos, state.get_position());
284 true
285 }
286 ';' => {
287 state.advance(1);
288 state.add_token(DotTokenType::Semicolon, start_pos, state.get_position());
289 true
290 }
291 ',' => {
292 state.advance(1);
293 state.add_token(DotTokenType::Comma, start_pos, state.get_position());
294 true
295 }
296 _ => false,
297 }
298 }
299 else {
300 false
301 }
302 }
303
304 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
306 let start_pos = state.get_position();
307
308 if let Some(ch) = state.peek() {
309 let token_kind = match ch {
310 '{' => DotTokenType::LeftBrace,
311 '}' => DotTokenType::RightBrace,
312 '[' => DotTokenType::LeftBracket,
313 ']' => DotTokenType::RightBracket,
314 '(' => DotTokenType::LeftParen,
315 ')' => DotTokenType::RightParen,
316 _ => return false,
317 };
318
319 state.advance(ch.len_utf8());
320 state.add_token(token_kind, start_pos, state.get_position());
321 true
322 }
323 else {
324 false
325 }
326 }
327}
328
329impl<'config> Lexer<DotLanguage> for DotLexer<'config> {
330 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<DotLanguage>) -> LexOutput<DotLanguage> {
331 let mut state = State::new(source);
332 let result = self.run(&mut state);
333 if result.is_ok() {
334 state.add_eof();
335 }
336 state.finish_with_cache(result, cache)
337 }
338}
339
340impl<'config> DotLexer<'config> {
341 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
343 while state.not_at_end() {
344 let safe_point = state.get_position();
345
346 if self.skip_whitespace(state) {
348 continue;
349 }
350
351 if self.lex_newline(state) {
352 continue;
353 }
354
355 if self.lex_comment(state) {
356 continue;
357 }
358
359 if self.lex_identifier_or_keyword(state) {
360 continue;
361 }
362
363 if self.lex_number(state) {
364 continue;
365 }
366
367 if self.lex_string(state) {
368 continue;
369 }
370
371 if self.lex_operator(state) {
372 continue;
373 }
374
375 if self.lex_delimiter(state) {
376 continue;
377 }
378
379 let start_pos = state.get_position();
381 if let Some(ch) = state.peek() {
382 state.advance(ch.len_utf8());
383 state.add_token(DotTokenType::Error, start_pos, state.get_position());
384 }
385
386 state.advance_if_dead_lock(safe_point);
387 }
388
389 Ok(())
390 }
391}