r2rust_core/
lexer.rs

1/// Token module for lexical analysis.
2///
3/// This module defines the tokens used in the R2Rust interpreter and provides
4/// functionality for tokenizing input strings.
5
6#[derive(PartialEq, Debug)]
7/// Represents the different types of tokens in the interpreter.
8pub enum Token {
9    /// An identifier, such as variable names.
10    Identifier(String),
11    /// Assignment operator (`<-`).
12    Assign,
13    /// Addition operator (`+`).
14    Plus,
15    /// Subtraction operator (`-`).
16    Minus,
17    /// A numeric literal (e.g., integers or floating-point numbers).
18    Number(f64),
19    /// End of file token.
20    EOF,
21}
22
23/// Tokenizes the input string into a vector of tokens.
24///
25/// # Arguments
26/// * `input` - The input string to tokenize.
27///
28/// # Returns
29/// A vector of tokens parsed from the input.
30///
31/// # Examples
32///
33/// use crate::lexer::{Token, tokenize};
34/// let tokens = tokenize("x <- 10 + y");
35/// assert_eq!(
36///     tokens,
37///     vec![
38///         Token::Identifier("x".to_string()),
39///         Token::Assign,
40///         Token::Number(10.0),
41///         Token::Plus,
42///         Token::Identifier("y".to_string()),
43///         Token::EOF,
44///     ]
45/// );
46///
47pub fn tokenize(input: &str) -> Vec<Token> {
48    let mut tokens = Vec::new();
49    let mut chars = input.chars().peekable();
50
51    while let Some(&ch) = chars.peek() {
52        match ch {
53            // Ignore whitespace
54            ' ' | '\t' | '\n' => {
55                chars.next();
56            }
57            // Operators
58            '+' => {
59                tokens.push(Token::Plus);
60                chars.next();
61            }
62            '-' => {
63                tokens.push(Token::Minus);
64                chars.next();
65            }
66            '<' => {
67                chars.next();
68                if chars.peek() == Some(&'-') {
69                    chars.next();
70                    tokens.push(Token::Assign);
71                } else {
72                    panic!("Error: Invalid symbol '<'");
73                }
74            }
75            // Numbers
76            '0'..='9' | '.' => {
77                let mut number = String::new();
78                while let Some(&digit) = chars.peek() {
79                    if digit.is_numeric() || digit == '.' {
80                        number.push(digit);
81                        chars.next();
82                    } else {
83                        break;
84                    }
85                }
86                tokens.push(Token::Number(
87                    number.parse::<f64>().expect("Error parsing number"),
88                ));
89            }
90            // Identifiers or variables
91            'a'..='z' | 'A'..='Z' => {
92                let mut identifier = String::new();
93                while let Some(&letter) = chars.peek() {
94                    if letter.is_alphanumeric() {
95                        identifier.push(letter);
96                        chars.next();
97                    } else {
98                        break;
99                    }
100                }
101                tokens.push(Token::Identifier(identifier));
102            }
103            // Unknown symbols
104            _ => {
105                panic!("Error: Unknown symbol '{}'", ch);
106            }
107        }
108    }
109
110    tokens.push(Token::EOF); // Add EOF token
111    tokens
112}