1use std::collections::HashMap;
4
5#[derive(Debug, Clone, PartialEq)]
7pub enum TokenKind {
8 Graph,
10 Node,
11 Edge,
12 Query,
13 Fn,
14 If,
15 For,
16 While,
17 Return,
18
19 LeftBrace,
21 RightBrace,
22 LeftParen,
23 RightParen,
24 LeftBracket,
25 RightBracket,
26 Semicolon,
27 Colon,
28 Comma,
29 Dot,
30 Arrow,
31
32 Assign,
34 Equal,
35 NotEqual,
36 Less,
37 Greater,
38 LessEqual,
39 GreaterEqual,
40 Plus,
41 Minus,
42 Star,
43 Slash,
44 And,
45 Or,
46
47 Identifier,
49 String,
50 Number,
51 Boolean,
52
53 Comment,
55 Whitespace,
56 Newline,
57 Eof,
58}
59
60#[derive(Debug, Clone)]
62pub struct Token {
63 pub kind: TokenKind,
64 pub text: String,
65 pub line: usize,
66 pub column: usize,
67}
68
69impl Token {
70 pub fn new(kind: TokenKind, text: String, line: usize, column: usize) -> Self {
71 Self {
72 kind,
73 text,
74 line,
75 column,
76 }
77 }
78}
79
80#[derive(Debug)]
82pub struct Parser {
83 input: String,
84 tokens: Vec<Token>,
85 position: usize,
86}
87
88impl Parser {
89 pub fn new(input: String) -> Self {
91 Self {
92 input,
93 tokens: Vec::new(),
94 position: 0,
95 }
96 }
97
98 pub fn tokenize(&mut self) -> Result<&[Token], Box<dyn std::error::Error>> {
100 self.tokens.clear();
101 self.position = 0;
102
103 while self.position < self.input.len() {
104 let ch = self.current_char();
105
106 match ch {
107 ' ' | '\t' => {
108 self.consume_whitespace();
109 }
110 '\n' | '\r' => {
111 self.consume_newline();
112 }
113 '/' => {
114 if self.peek_char() == Some('/') {
115 self.consume_comment();
116 } else if self.peek_char() == Some('*') {
117 self.consume_multiline_comment();
118 } else {
119 self.add_token(TokenKind::Slash, "/".to_string());
120 self.position += 1;
121 }
122 }
123 '{' => {
124 self.add_token(TokenKind::LeftBrace, "{".to_string());
125 self.position += 1;
126 }
127 '}' => {
128 self.add_token(TokenKind::RightBrace, "}".to_string());
129 self.position += 1;
130 }
131 '(' => {
132 self.add_token(TokenKind::LeftParen, "(".to_string());
133 self.position += 1;
134 }
135 ')' => {
136 self.add_token(TokenKind::RightParen, ")".to_string());
137 self.position += 1;
138 }
139 '[' => {
140 self.add_token(TokenKind::LeftBracket, "[".to_string());
141 self.position += 1;
142 }
143 ']' => {
144 self.add_token(TokenKind::RightBracket, "]".to_string());
145 self.position += 1;
146 }
147 ';' => {
148 self.add_token(TokenKind::Semicolon, ";".to_string());
149 self.position += 1;
150 }
151 ':' => {
152 self.add_token(TokenKind::Colon, ":".to_string());
153 self.position += 1;
154 }
155 ',' => {
156 self.add_token(TokenKind::Comma, ",".to_string());
157 self.position += 1;
158 }
159 '.' => {
160 self.add_token(TokenKind::Dot, ".".to_string());
161 self.position += 1;
162 }
163 '=' => {
164 if self.peek_char() == Some('=') {
165 self.add_token(TokenKind::Equal, "==".to_string());
166 self.position += 2;
167 } else {
168 self.add_token(TokenKind::Assign, "=".to_string());
169 self.position += 1;
170 }
171 }
172 '!' => {
173 if self.peek_char() == Some('=') {
174 self.add_token(TokenKind::NotEqual, "!=".to_string());
175 self.position += 2;
176 } else {
177 self.consume_identifier();
179 }
180 }
181 '<' => {
182 if self.peek_char() == Some('=') {
183 self.add_token(TokenKind::LessEqual, "<=".to_string());
184 self.position += 2;
185 } else {
186 self.add_token(TokenKind::Less, "<".to_string());
187 self.position += 1;
188 }
189 }
190 '>' => {
191 if self.peek_char() == Some('=') {
192 self.add_token(TokenKind::GreaterEqual, ">=".to_string());
193 self.position += 2;
194 } else {
195 self.add_token(TokenKind::Greater, ">".to_string());
196 self.position += 1;
197 }
198 }
199 '+' => {
200 self.add_token(TokenKind::Plus, "+".to_string());
201 self.position += 1;
202 }
203 '-' => {
204 if self.peek_char() == Some('>') {
205 self.add_token(TokenKind::Arrow, "->".to_string());
206 self.position += 2;
207 } else {
208 self.add_token(TokenKind::Minus, "-".to_string());
209 self.position += 1;
210 }
211 }
212 '*' => {
213 self.add_token(TokenKind::Star, "*".to_string());
214 self.position += 1;
215 }
216 '&' => {
217 if self.peek_char() == Some('&') {
218 self.add_token(TokenKind::And, "&&".to_string());
219 self.position += 2;
220 } else {
221 self.consume_identifier();
223 }
224 }
225 '|' => {
226 if self.peek_char() == Some('|') {
227 self.add_token(TokenKind::Or, "||".to_string());
228 self.position += 2;
229 } else {
230 self.consume_identifier();
232 }
233 }
234 '"' => {
235 self.consume_string();
236 }
237 '0'..='9' => {
238 self.consume_number();
239 }
240 'a'..='z' | 'A'..='Z' | '_' => {
241 self.consume_identifier();
242 }
243 _ => {
244 self.position += 1;
246 }
247 }
248 }
249
250 self.add_token(TokenKind::Eof, "".to_string());
251 Ok(&self.tokens)
252 }
253
254 fn current_char(&self) -> char {
256 self.input[self.position..].chars().next().unwrap()
257 }
258
259 fn peek_char(&self) -> Option<char> {
261 self.input[self.position + 1..].chars().next()
262 }
263
264 fn consume_whitespace(&mut self) {
266 let start = self.position;
267 while self.position < self.input.len() &&
268 matches!(self.current_char(), ' ' | '\t') {
269 self.position += 1;
270 }
271
272 if start != self.position {
273 let text = self.input[start..self.position].to_string();
274 self.add_token(TokenKind::Whitespace, text);
275 }
276 }
277
278 fn consume_newline(&mut self) {
280 let start = self.position;
281 while self.position < self.input.len() &&
282 matches!(self.current_char(), '\n' | '\r') {
283 self.position += 1;
284 }
285
286 if start != self.position {
287 let text = self.input[start..self.position].to_string();
288 self.add_token(TokenKind::Newline, text);
289 }
290 }
291
292 fn consume_comment(&mut self) {
294 let start = self.position;
295 while self.position < self.input.len() && self.current_char() != '\n' {
296 self.position += 1;
297 }
298
299 let text = self.input[start..self.position].to_string();
300 self.add_token(TokenKind::Comment, text);
301 }
302
303 fn consume_multiline_comment(&mut self) {
305 let start = self.position;
306 self.position += 2; while self.position < self.input.len() - 1 {
309 if self.current_char() == '*' && self.peek_char() == Some('/') {
310 self.position += 2;
311 break;
312 }
313 self.position += 1;
314 }
315
316 let text = self.input[start..self.position].to_string();
317 self.add_token(TokenKind::Comment, text);
318 }
319
320 fn consume_string(&mut self) {
322 let start = self.position;
323 self.position += 1; while self.position < self.input.len() && self.current_char() != '"' {
326 if self.current_char() == '\\' {
327 self.position += 2; } else {
329 self.position += 1;
330 }
331 }
332
333 if self.position < self.input.len() {
334 self.position += 1; }
336
337 let text = self.input[start..self.position].to_string();
338 self.add_token(TokenKind::String, text);
339 }
340
341 fn consume_number(&mut self) {
343 let start = self.position;
344
345 while self.position < self.input.len() &&
346 matches!(self.current_char(), '0'..='9' | '.' | 'e' | 'E' | '+' | '-') {
347 self.position += 1;
348 }
349
350 let text = self.input[start..self.position].to_string();
351 self.add_token(TokenKind::Number, text);
352 }
353
354 fn consume_identifier(&mut self) {
356 let start = self.position;
357
358 while self.position < self.input.len() &&
359 matches!(self.current_char(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
360 self.position += 1;
361 }
362
363 let text = self.input[start..self.position].to_string();
364
365 let kind = match text.as_str() {
367 "graph" => TokenKind::Graph,
368 "node" => TokenKind::Node,
369 "edge" => TokenKind::Edge,
370 "query" => TokenKind::Query,
371 "fn" => TokenKind::Fn,
372 "if" => TokenKind::If,
373 "for" => TokenKind::For,
374 "while" => TokenKind::While,
375 "return" => TokenKind::Return,
376 "true" | "false" => TokenKind::Boolean,
377 _ => TokenKind::Identifier,
378 };
379
380 self.add_token(kind, text);
381 }
382
383 fn add_token(&mut self, kind: TokenKind, text: String) {
385 let line = self.input[..self.position].chars().filter(|&c| c == '\n').count() + 1;
387 let last_newline = self.input[..self.position].rfind('\n').unwrap_or(0);
388 let column = self.position - last_newline;
389
390 self.tokens.push(Token::new(kind, text, line, column));
391 }
392}
393
394#[derive(Debug)]
396pub struct ParseResult {
397 pub tokens: Vec<Token>,
398 pub errors: Vec<String>,
399}
400
401impl ParseResult {
402 pub fn new() -> Self {
403 Self {
404 tokens: Vec::new(),
405 errors: Vec::new(),
406 }
407 }
408
409 pub fn success(tokens: Vec<Token>) -> Self {
410 Self {
411 tokens,
412 errors: Vec::new(),
413 }
414 }
415
416 pub fn error(error: String) -> Self {
417 Self {
418 tokens: Vec::new(),
419 errors: vec![error],
420 }
421 }
422}
423
424pub fn parse(input: &str) -> ParseResult {
426 let mut parser = Parser::new(input.to_string());
427
428 match parser.tokenize() {
429 Ok(tokens) => ParseResult::success(tokens.to_vec()),
430 Err(e) => ParseResult::error(e.to_string()),
431 }
432}