mathengine_lexer/
lib.rs

1use std::{iter::Peekable, str::Chars};
2
3pub mod error;
4pub use error::LexError;
5
6#[derive(Debug, Clone, PartialEq)]
7pub enum Token {
8    Operation(Operation),
9    Number(f64),
10    UnitValue { value: f64, unit: String },
11    Unit(String),
12    Lparen,
13    Rparen,
14}
15
16#[derive(Debug, Clone, PartialEq)]
17pub enum Operation {
18    Add,
19    Subtract,
20    Divide,
21    Multiply,
22    Power,
23    Convert,
24}
25
26pub struct Lexer {
27    source: String,
28}
29
30impl Lexer {
31    pub fn new<S: Into<String>>(input: S) -> Self {
32        let source = input.into();
33
34        Self { source }
35    }
36
37    pub fn tokenize(self) -> Result<Vec<Token>, LexError> {
38        if self.source.trim().is_empty() {
39            return Err(LexError::EmptyInput);
40        }
41
42        let mut tokens = Vec::new();
43        let mut chars = self.source.chars().peekable();
44        let mut position = 0;
45
46        while let Some(ch) = chars.next() {
47            match ch {
48                '0'..='9' => {
49                    let start_pos = position;
50                    let num = self.lex_number(ch, &mut chars);
51                    position += num.len();
52
53                    // Skip whitespace after number
54                    while let Some(&c) = chars.peek() {
55                        if c.is_whitespace() {
56                            chars.next();
57                            position += 1;
58                        } else {
59                            break;
60                        }
61                    }
62                    // Check if there's a unit attached (with or without space)
63                    if let Some(&c) = chars.peek() {
64                        if c.is_alphabetic() {
65                            let unit = self.lex_identifier(chars.next().unwrap(), &mut chars);
66                            position += unit.len();
67                            let value =
68                                num.parse::<f64>().map_err(|_| LexError::InvalidNumber {
69                                    input: num.clone(),
70                                    position: start_pos,
71                                })?;
72                            tokens.push(Token::UnitValue { value, unit });
73                        } else {
74                            let value =
75                                num.parse::<f64>().map_err(|_| LexError::InvalidNumber {
76                                    input: num.clone(),
77                                    position: start_pos,
78                                })?;
79                            tokens.push(Token::Number(value));
80                        }
81                    } else {
82                        let value = num.parse::<f64>().map_err(|_| LexError::InvalidNumber {
83                            input: num.clone(),
84                            position: start_pos,
85                        })?;
86                        tokens.push(Token::Number(value));
87                    }
88                }
89                c if c.is_alphabetic() => {
90                    let ident = self.lex_identifier(c, &mut chars);
91                    position += ident.len();
92
93                    let tok: Token = match ident.to_lowercase().as_ref() {
94                        "to" => Token::Operation(Operation::Convert),
95                        v => Token::Unit(v.into()),
96                    };
97
98                    tokens.push(tok);
99                }
100                '+' => {
101                    tokens.push(Token::Operation(Operation::Add));
102                    position += 1;
103                }
104                '-' => {
105                    tokens.push(Token::Operation(Operation::Subtract));
106                    position += 1;
107                }
108                '*' => {
109                    tokens.push(Token::Operation(Operation::Multiply));
110                    position += 1;
111                }
112                '/' => {
113                    tokens.push(Token::Operation(Operation::Divide));
114                    position += 1;
115                }
116                '^' => {
117                    tokens.push(Token::Operation(Operation::Power));
118                    position += 1;
119                }
120                '(' => {
121                    tokens.push(Token::Lparen);
122                    position += 1;
123                }
124                ')' => {
125                    tokens.push(Token::Rparen);
126                    position += 1;
127                }
128                c if c.is_whitespace() => {
129                    position += 1;
130                    continue;
131                }
132                _ => {
133                    return Err(LexError::UnexpectedCharacter { char: ch, position });
134                }
135            }
136        }
137        Ok(tokens)
138    }
139
140    fn lex_number(&self, first_digit: char, chars: &mut Peekable<Chars<'_>>) -> String {
141        let mut s = first_digit.to_string();
142        while let Some(&next) = chars.peek() {
143            if next.is_ascii_digit() || next == '.' {
144                s.push(next);
145                chars.next();
146            } else {
147                break;
148            }
149        }
150
151        s
152    }
153
154    fn lex_identifier(&self, first_char: char, chars: &mut Peekable<Chars<'_>>) -> String {
155        let mut ident = String::new();
156        ident.push(first_char);
157
158        while let Some(&next) = chars.peek() {
159            if next.is_alphanumeric() {
160                ident.push(next);
161                chars.next();
162            } else {
163                break;
164            }
165        }
166
167        ident
168    }
169}