cruncher/
lexer.rs

1use crate::error::Error;
2use crate::token::{Op, Token};
3use crate::util::FUNCTIONS;
4use std::iter::Peekable;
5use std::str::Chars;
6
7#[must_use]
8/// Check if `ident` is a valid variable name
9///
10/// # Examples
11///
12/// ```
13/// # use cruncher::is_variable;
14///
15/// assert_eq!(is_variable("__abc3"), true);
16/// assert_eq!(is_variable("34zb"), false);
17/// ```
18pub fn is_variable(ident: &str) -> bool {
19    let mut chars = ident.chars();
20    // Check first char
21    if !chars.next().map_or(false, is_variable_start) {
22        return false;
23    }
24    // Check all others
25    for c in chars {
26        if !is_variable_part(c) {
27            return false;
28        }
29    }
30    return true;
31}
32
33/// An helper struct for lexing the input
34pub struct Lexer<'a> {
35    input: Peekable<Chars<'a>>,
36}
37
38impl<'a> Lexer<'a> {
39    pub fn new(string: &str) -> Lexer {
40        Lexer {
41            input: string.chars().peekable(),
42        }
43    }
44
45    pub fn parse(&mut self) -> Result<Vec<Token>, Error> {
46        let mut output = Vec::new();
47        let mut operators = Vec::new();
48
49        'tokens: while let Some(token) = self.next_token()? {
50            match token {
51                Token::Value(ref name) if FUNCTIONS.contains_key(name) => {
52                    operators.push(token.clone());
53                }
54                Token::Value(_) => output.push(token),
55                Token::Op(o1) => {
56                    'operators: while let Some(token) = operators.last().cloned() {
57                        match token {
58                            Token::Op(o2) => {
59                                let pop_me =
60                                    o1.is_left_associative() && o1.precedence() <= o2.precedence();
61                                let pop_me = pop_me
62                                    || o1.is_right_associative()
63                                        && o1.precedence() < o2.precedence();
64                                if pop_me {
65                                    operators.pop();
66                                    output.push(Token::Op(o2));
67                                } else {
68                                    break 'operators;
69                                }
70                            }
71                            _ => break 'operators,
72                        }
73                    }
74                    operators.push(token)
75                }
76                Token::LParen => operators.push(token),
77                Token::RParen => {
78                    while let Some(token) = operators.pop() {
79                        match token {
80                            Token::LParen => {
81                                let next_is_fn =
82                                    if let Some(&Token::Value(ref name)) = operators.last() {
83                                        FUNCTIONS.contains_key(name)
84                                    } else {
85                                        false
86                                    };
87
88                                if next_is_fn {
89                                    output.push(operators.pop().expect("emtpy operator stack"));
90                                }
91                                continue 'tokens;
92                            }
93                            Token::Op(_) => output.push(token),
94                            other => panic!("Internal bug: found {:?} in operators stack", other),
95                        }
96                    }
97                    return Err(Error::ParseError("mismatched parenthesis".into()));
98                }
99            }
100        }
101
102        while let Some(token) = operators.pop() {
103            match token {
104                Token::LParen => return Err(Error::ParseError("mismatched parenthesis".into())),
105                Token::Op(_) => output.push(token),
106                other => panic!("Internal bug: found {:?} in operators stack", other),
107            }
108        }
109        Ok(output)
110    }
111
112    fn next_token(&mut self) -> Result<Option<Token>, Error> {
113        if let Some(c) = self.input.next() {
114            let token = match c {
115                ' ' | '\t' | '\n' | '\r' => return self.next_token(),
116                c if is_value_start(c) => {
117                    let mut ident = String::new();
118                    ident.push(c);
119                    'value: while let Some(&c) = self.input.peek() {
120                        if is_value_part(c) {
121                            self.input.next();
122                            ident.push(c);
123                        } else {
124                            break 'value;
125                        }
126                    }
127                    // Special case to handle numbers starting with + or -
128                    if ident == "+" {
129                        Token::Op(Op::Plus)
130                    } else if ident == "-" {
131                        Token::Op(Op::Minus)
132                    } else {
133                        Token::Value(ident)
134                    }
135                }
136                '*' => Token::Op(Op::Mul),
137                '/' => Token::Op(Op::Div),
138                '^' => Token::Op(Op::Exp),
139                '(' => Token::LParen,
140                ')' => Token::RParen,
141                other => {
142                    return Err(Error::ParseError(format!(
143                        "unexpected characted in input: {}",
144                        other
145                    )));
146                }
147            };
148            Ok(Some(token))
149        } else {
150            Ok(None)
151        }
152    }
153}
154
155/// Check if `c` can appear at the first character of a value
156fn is_value_start(c: char) -> bool {
157    c == '+' || c == '-' || c.is_digit(10) || is_variable_start(c)
158}
159
160/// Check if `c` can appear inside a value
161fn is_value_part(c: char) -> bool {
162    c == '+' || c == '-' || c == '.' || is_variable_part(c)
163}
164
165/// Check if `c` can appear at the first character of a variable
166fn is_variable_start(c: char) -> bool {
167    c == '_' || (c.is_ascii() && c.is_alphabetic())
168}
169
170/// Check if `c` can appear inside a variable
171fn is_variable_part(c: char) -> bool {
172    c == '.' || c == '_' || c == '[' || c == ']' || (c.is_ascii() && c.is_alphanumeric())
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn idents() {
181        let variable_starts = ['c', 'Z', '_', 'f'];
182        for c in &variable_starts {
183            assert!(is_variable_start(*c));
184        }
185
186        let non_variable_starts = ['3', 'à', '@', ']', '[', '.'];
187        for c in &non_variable_starts {
188            assert!(!is_variable_start(*c));
189        }
190
191        let variable_parts = ['c', 'Z', '_', 'f', '3', '[', ']', '.'];
192        for c in &variable_parts {
193            assert!(is_variable_part(*c));
194        }
195
196        let non_variable_parts = ['à', '@', '{', '}', '(', ')', '?', '&', '$'];
197        for c in &non_variable_parts {
198            assert!(!is_variable_part(*c));
199        }
200
201        let variables = [
202            "_______",
203            "abc",
204            "ab.gd_hc",
205            "abc[2as]",
206            "abc[",
207            "abc]",
208            "abc[90]",
209            "ab.c[90]",
210            "a__45__bc",
211        ];
212        for v in &variables {
213            assert!(is_variable(v));
214        }
215
216        let non_variables = ["a-bc", "@bc", "6bc"];
217        for nv in &non_variables {
218            assert!(!is_variable(nv));
219        }
220    }
221
222    #[test]
223    fn values() {
224        let value_starts = ['c', 'Z', '_', 'f', '3', '+', '-'];
225        for c in &value_starts {
226            assert!(is_value_start(*c));
227        }
228
229        let non_value_starts = ['à', '@'];
230        for c in &non_value_starts {
231            assert!(!is_value_start(*c));
232        }
233
234        let value_parts = ['c', 'Z', '_', 'f', '3', '-', '+', '.'];
235        for c in &value_parts {
236            assert!(is_value_part(*c));
237        }
238
239        let non_value_parts = ['à', '@'];
240        for c in &non_value_parts {
241            assert!(!is_value_part(*c));
242        }
243    }
244}