1#[derive(Clone, Debug, PartialEq, Eq, Hash)]
3pub enum Token {
4 Import,
6 From,
7 Def,
8 If,
9 Elif,
10 Else,
11 For,
12 While,
13 Return,
14 Pass,
15 In,
16 Global,
17 As,
18 At,
19 Asat,
20 And,
21 Or,
22 Not,
23 Unless,
24 Match,
25 Case,
26 Const,
27 Define,
28 Create,
29 End,
30 To,
31 By,
32 Underscore,
33
34 Number(String), String(String),
37 True_,
38 False_,
39 None_,
40
41 Ident(String),
43
44 LParen,
46 RParen,
47 LBracket,
48 RBracket,
49 LBrace,
50 RBrace,
51 Colon,
52 SemiColon,
53 Comma,
54 Dot,
55 Equals,
56 Plus,
57 Minus,
58 Star,
59 Slash,
60 Percent,
61 Caret,
62
63 EqEq,
65 NotEq,
66 Lt,
67 LtEq,
68 Gt,
69 GtEq,
70
71 MinecraftCommand(String),
73 Newline,
74 Indent,
75 Dedent,
76 Eof,
77}
78
79impl std::fmt::Display for Token {
80 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
81 match self {
82 Token::Ident(s) => write!(f, "{}", s),
83 Token::String(s) => write!(
84 f,
85 "{}",
86 serde_json::to_string(s).unwrap_or_else(|_| format!("\"{}\"", s))
87 ),
88 Token::Number(n) => write!(f, "{}", n),
89 Token::MinecraftCommand(s) => write!(f, "/{}", s),
90 Token::Dot => write!(f, "."),
91 Token::Colon => write!(f, ":"),
92 Token::SemiColon => write!(f, ";"),
93 Token::Comma => write!(f, ","),
94 Token::LParen => write!(f, "("),
95 Token::RParen => write!(f, ")"),
96 Token::LBracket => write!(f, "["),
97 Token::RBracket => write!(f, "]"),
98 Token::LBrace => write!(f, "{{"),
99 Token::RBrace => write!(f, "}}"),
100 Token::Plus => write!(f, "+"),
101 Token::Minus => write!(f, "-"),
102 Token::Star => write!(f, "*"),
103 Token::Slash => write!(f, "/"),
104 Token::Percent => write!(f, "%"),
105 Token::Caret => write!(f, "^"),
106 Token::Equals => write!(f, "="),
107 Token::EqEq => write!(f, "=="),
108 Token::NotEq => write!(f, "!="),
109 Token::Lt => write!(f, "<"),
110 Token::LtEq => write!(f, "<="),
111 Token::Gt => write!(f, ">"),
112 Token::GtEq => write!(f, ">="),
113 Token::If => write!(f, "if"),
115 Token::Unless => write!(f, "unless"),
116 Token::As => write!(f, "as"),
117 Token::At => write!(f, "at"),
118 Token::And => write!(f, "and"),
119 Token::Or => write!(f, "or"),
120 Token::Not => write!(f, "not"),
121 Token::In => write!(f, "in"),
122 Token::For => write!(f, "for"),
123 Token::While => write!(f, "while"),
124 Token::Elif => write!(f, "elif"),
125 Token::Else => write!(f, "else"),
126 Token::Def => write!(f, "def"),
127 Token::Return => write!(f, "return"),
128 Token::Pass => write!(f, "pass"),
129 Token::Global => write!(f, "global"),
130 Token::Import => write!(f, "import"),
131 Token::From => write!(f, "from"),
132 Token::Asat => write!(f, "asat"),
133 Token::Match => write!(f, "match"),
134 Token::Case => write!(f, "case"),
135 Token::Const => write!(f, "const"),
136 Token::Define => write!(f, "define"),
137 Token::Create => write!(f, "create"),
138 Token::End => write!(f, "end"),
139 Token::To => write!(f, "to"),
140 Token::By => write!(f, "by"),
141 Token::Underscore => write!(f, "_"),
142 Token::True_ => write!(f, "True"),
143 Token::False_ => write!(f, "False"),
144 Token::None_ => write!(f, "None"),
145 _ => write!(f, "{:?}", self),
146 }
147 }
148}
149
150pub fn tokenize(source: &str) -> Result<Vec<Token>, String> {
152 let mut tokens = Vec::new();
153 let mut indent_stack: Vec<usize> = vec![0];
154 let mut paren_depth = 0;
155
156 for (line_idx, line) in source.lines().enumerate() {
157 let trimmed = line.trim();
159 if trimmed.is_empty() || trimmed.starts_with('#') {
160 continue;
161 }
162
163 if paren_depth == 0 {
165 let indent_level = line.len() - line.trim_start().len();
167 let current_indent = *indent_stack.last().unwrap();
168
169 if indent_level > current_indent {
171 indent_stack.push(indent_level);
172 tokens.push(Token::Indent);
173 } else if indent_level < current_indent {
174 while indent_stack.len() > 1 && *indent_stack.last().unwrap() > indent_level {
175 indent_stack.pop();
176 tokens.push(Token::Dedent);
177 }
178 if *indent_stack.last().unwrap() != indent_level {
179 return Err(format!("Indentation error at line {}", line_idx + 1));
180 }
181 }
182 }
183
184 let line_content = line.trim();
186 tokenize_line(line_content, &mut tokens, &mut paren_depth)?;
187
188 if paren_depth == 0 {
190 tokens.push(Token::Newline);
191 }
192 }
193
194 while indent_stack.len() > 1 {
196 indent_stack.pop();
197 tokens.push(Token::Dedent);
198 }
199
200 tokens.push(Token::Eof);
201 Ok(tokens)
202}
203
204fn should_be_binary_minus(tokens: &[Token]) -> bool {
207 if let Some(last_token) = tokens.last() {
210 matches!(
211 last_token,
212 Token::Number(_)
213 | Token::Ident(_)
214 | Token::RParen
215 | Token::RBracket
216 | Token::True_
217 | Token::False_
218 | Token::None_
219 )
220 } else {
221 false
223 }
224}
225
226fn should_be_power_operator(tokens: &[Token]) -> bool {
229 if let Some(last_token) = tokens.last() {
232 matches!(
233 last_token,
234 Token::Number(_)
235 | Token::Ident(_)
236 | Token::RParen
237 | Token::RBracket
238 | Token::True_
239 | Token::False_
240 | Token::None_
241 )
242 } else {
243 false
244 }
245}
246
247fn tokenize_line(line: &str, tokens: &mut Vec<Token>, paren_depth: &mut i32) -> Result<(), String> {
249 let mut chars = line.chars().peekable();
250
251 while let Some(&ch) = chars.peek() {
252 match ch {
253 ' ' | '\t' => {
254 chars.next();
255 }
256 '/' => {
257 chars.next();
260 if let Some(&next_ch) = chars.peek() {
261 if next_ch.is_alphabetic() {
263 let mut cmd: String = chars.collect();
265 cmd = strip_minecraft_inline_comment(&cmd).to_string();
266 cmd = cmd.trim_end().to_string();
267
268 tokens.push(Token::MinecraftCommand(cmd));
269 break;
270 } else {
271 tokens.push(Token::Slash);
273 }
274 } else {
275 tokens.push(Token::Slash);
277 }
278 }
279 '"' | '\'' => {
280 let quote = chars.next().unwrap();
282 let mut s = String::new();
283 let mut escaped = false;
284 for ch in chars.by_ref() {
285 if escaped {
286 s.push(ch);
287 escaped = false;
288 } else if ch == '\\' {
289 escaped = true;
290 } else if ch == quote {
291 break;
292 } else {
293 s.push(ch);
294 }
295 }
296 tokens.push(Token::String(s));
297 }
298 '0'..='9' => {
299 let mut num = String::new();
301 while let Some(&ch) = chars.peek() {
302 if ch.is_ascii_digit() {
303 num.push(chars.next().unwrap());
304 } else if ch == '.' {
305 let mut temp_chars = chars.clone();
307 temp_chars.next(); if let Some(&next_ch) = temp_chars.peek() {
309 if next_ch == '.' {
310 break;
312 }
313 }
314 num.push(chars.next().unwrap());
316 } else {
317 break;
318 }
319 }
320 if num.parse::<f64>().is_err() {
322 return Err(format!(
323 "Invalid number literal: '{}' at line {}",
324 num, line
325 ));
326 }
327 tokens.push(Token::Number(num));
328 }
329 'a'..='z' | 'A'..='Z' | '_' => {
330 let mut ident = String::new();
332 while let Some(&ch) = chars.peek() {
333 if ch.is_alphanumeric() || ch == '_' {
334 ident.push(chars.next().unwrap());
335 } else if ch == ':' {
336 let mut temp_chars = chars.clone();
338 temp_chars.next(); if let Some(&next_ch) = temp_chars.peek() {
340 if next_ch.is_alphabetic() || next_ch == '_' {
341 ident.push(chars.next().unwrap()); continue;
344 }
345 }
346 break;
348 } else {
349 break;
350 }
351 }
352 let token = match ident.as_str() {
353 "import" => Token::Import,
354 "from" => Token::From,
355 "def" => Token::Def,
356 "if" => Token::If,
357 "elif" => Token::Elif,
358 "else" => Token::Else,
359 "for" => Token::For,
360 "while" => Token::While,
361 "return" => Token::Return,
362 "pass" => Token::Pass,
363 "in" => Token::In,
364 "global" => Token::Global,
365 "as" => Token::As,
366 "at" => Token::At,
367 "asat" => Token::Asat,
368 "and" => Token::And,
369 "or" => Token::Or,
370 "not" => Token::Not,
371 "unless" => Token::Unless,
372 "match" => Token::Match,
373 "case" => Token::Case,
374 "const" => Token::Const,
375 "define" => Token::Define,
376 "create" => Token::Create,
377 "end" => Token::End,
378 "to" => Token::To,
379 "by" => Token::By,
380 "_" => Token::Underscore,
381 "True" => Token::True_,
382 "False" => Token::False_,
383 "None" => Token::None_,
384 _ => Token::Ident(ident),
385 };
386 tokens.push(token);
387 }
388 '@' => {
389 let mut selector = String::new();
391 selector.push(chars.next().unwrap()); while let Some(&ch) = chars.peek() {
394 if ch.is_alphanumeric() || ch == '_' {
395 selector.push(chars.next().unwrap());
396 } else {
397 break;
398 }
399 }
400 if chars.peek() == Some(&'[') {
402 let mut bracket_depth = 0;
403 while let Some(ch) = chars.peek() {
404 selector.push(*ch);
405 if *ch == '[' {
406 bracket_depth += 1;
407 } else if *ch == ']' {
408 bracket_depth -= 1;
409 chars.next();
410 if bracket_depth == 0 {
411 break;
412 }
413 continue;
414 }
415 chars.next();
416 }
417 }
418 tokens.push(Token::Ident(selector));
419 }
420 '~' => {
421 let mut coord = String::new();
423 coord.push(chars.next().unwrap());
424 while let Some(&ch) = chars.peek() {
425 if ch.is_ascii_digit() || ch == '.' || ch == '-' {
426 coord.push(chars.next().unwrap());
427 } else {
428 break;
429 }
430 }
431 tokens.push(Token::Ident(coord));
432 }
433 '^' => {
434 chars.next();
435 if should_be_power_operator(tokens) {
438 tokens.push(Token::Caret);
440 } else if let Some(&ch) = chars.peek() {
441 if ch.is_ascii_digit() || ch == '.' || ch == '-' {
442 let mut coord = String::from("^");
444 while let Some(&ch) = chars.peek() {
445 if ch.is_ascii_digit() || ch == '.' || ch == '-' {
446 coord.push(chars.next().unwrap());
447 } else {
448 break;
449 }
450 }
451 tokens.push(Token::Ident(coord));
452 } else {
453 tokens.push(Token::Caret);
455 }
456 } else {
457 tokens.push(Token::Caret);
459 }
460 }
461 '=' => {
462 chars.next();
463 if chars.peek() == Some(&'=') {
464 chars.next();
465 tokens.push(Token::EqEq);
466 } else {
467 tokens.push(Token::Equals);
468 }
469 }
470 '!' => {
471 chars.next();
472 if chars.peek() == Some(&'=') {
473 chars.next();
474 tokens.push(Token::NotEq);
475 } else {
476 return Err("Unexpected '!' character".to_string());
477 }
478 }
479 '<' => {
480 chars.next();
481 if chars.peek() == Some(&'=') {
482 chars.next();
483 tokens.push(Token::LtEq);
484 } else {
485 tokens.push(Token::Lt);
486 }
487 }
488 '>' => {
489 chars.next();
490 if chars.peek() == Some(&'=') {
491 chars.next();
492 tokens.push(Token::GtEq);
493 } else {
494 tokens.push(Token::Gt);
495 }
496 }
497 '(' => {
498 chars.next();
499 tokens.push(Token::LParen);
500 *paren_depth += 1;
501 }
502 ')' => {
503 chars.next();
504 tokens.push(Token::RParen);
505 *paren_depth -= 1;
506 }
507 '[' => {
508 chars.next();
509 tokens.push(Token::LBracket);
510 *paren_depth += 1;
511 }
512 ']' => {
513 chars.next();
514 tokens.push(Token::RBracket);
515 *paren_depth -= 1;
516 }
517 ':' => {
518 chars.next();
519 tokens.push(Token::Colon);
520 }
521 ';' => {
522 chars.next();
523 tokens.push(Token::SemiColon);
524 }
525 ',' => {
526 chars.next();
527 tokens.push(Token::Comma);
528 }
529 '.' => {
530 chars.next();
531 tokens.push(Token::Dot);
532 }
533 '+' => {
534 chars.next();
535 tokens.push(Token::Plus);
536 }
537 '-' => {
538 chars.next();
539 if let Some(&next_ch) = chars.peek() {
541 if next_ch.is_ascii_digit() && !should_be_binary_minus(tokens) {
545 let mut num = String::from("-");
546 while let Some(&ch) = chars.peek() {
547 if ch.is_ascii_digit() {
548 num.push(chars.next().unwrap());
549 } else if ch == '.' {
550 let mut temp_chars = chars.clone();
552 temp_chars.next(); if let Some(&next_ch) = temp_chars.peek() {
554 if next_ch == '.' {
555 break;
557 }
558 }
559 num.push(chars.next().unwrap());
561 } else {
562 break;
563 }
564 }
565 if num.parse::<f64>().is_err() {
567 return Err(format!(
568 "Invalid number literal: '{}' at line {}",
569 num, line
570 ));
571 }
572 tokens.push(Token::Number(num));
573 } else {
574 tokens.push(Token::Minus);
576 }
577 } else {
578 tokens.push(Token::Minus);
579 }
580 }
581 '*' => {
582 chars.next();
583 tokens.push(Token::Star);
584 }
585 '%' => {
586 chars.next();
587 tokens.push(Token::Percent);
588 }
589 '{' => {
590 chars.next();
591 tokens.push(Token::LBrace);
592 *paren_depth += 1;
593 }
594 '}' => {
595 chars.next();
596 tokens.push(Token::RBrace);
597 *paren_depth -= 1;
598 }
599 '#' => {
600 break;
602 }
603 _ => {
604 return Err(format!("Unexpected character: {}", ch));
605 }
606 }
607 }
608
609 Ok(())
610}
611
612fn strip_minecraft_inline_comment(command: &str) -> &str {
613 let mut quote: Option<char> = None;
614 let mut escaped = false;
615 let chars: Vec<(usize, char)> = command.char_indices().collect();
616
617 for (position, (index, ch)) in chars.iter().enumerate() {
618 if escaped {
619 escaped = false;
620 continue;
621 }
622
623 if *ch == '\\' {
624 escaped = true;
625 continue;
626 }
627
628 if let Some(active_quote) = quote {
629 if *ch == active_quote {
630 quote = None;
631 }
632 continue;
633 }
634
635 if *ch == '"' || *ch == '\'' {
636 quote = Some(*ch);
637 continue;
638 }
639
640 if *ch == '#' {
641 let prev_is_space = position == 0
642 || chars
643 .get(position.wrapping_sub(1))
644 .map(|(_, c)| c.is_whitespace())
645 .unwrap_or(false);
646 let next_is_space_or_end = chars
647 .get(position + 1)
648 .map(|(_, c)| c.is_whitespace())
649 .unwrap_or(true);
650 if prev_is_space && next_is_space_or_end {
651 return command[..*index].trim_end();
652 }
653 }
654 }
655
656 command
657}