sieve/compiler/grammar/expr/
parser.rs

1/*
2 * Copyright (c) 2020-2023, Stalwart Labs Ltd.
3 *
4 * This file is part of the Stalwart Sieve Interpreter.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Affero General Public License as
8 * published by the Free Software Foundation, either version 3 of
9 * the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Affero General Public License for more details.
15 * in the LICENSE file at the top-level directory of this distribution.
16 * You should have received a copy of the GNU Affero General Public License
17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 *
19 * You can be released from the requirements of the AGPLv3 license by
20 * purchasing a commercial license. Please contact licensing@stalw.art
21 * for more details.
22*/
23
24use super::{tokenizer::Tokenizer, BinaryOperator, Expression, Token};
25
26pub(crate) struct ExpressionParser<'x, F>
27where
28    F: Fn(&str, bool) -> Result<Token, String>,
29{
30    pub(crate) tokenizer: Tokenizer<'x, F>,
31    pub(crate) output: Vec<Expression>,
32    operator_stack: Vec<(Token, Option<usize>)>,
33    arg_count: Vec<i32>,
34}
35
36pub(crate) const ID_ARRAY_ACCESS: u32 = u32::MAX;
37pub(crate) const ID_ARRAY_BUILD: u32 = u32::MAX - 1;
38pub(crate) const ID_EXTERNAL: u32 = u32::MAX - 2;
39
40impl<'x, F> ExpressionParser<'x, F>
41where
42    F: Fn(&str, bool) -> Result<Token, String>,
43{
44    pub fn from_tokenizer(tokenizer: Tokenizer<'x, F>) -> Self {
45        Self {
46            tokenizer,
47            output: Vec::new(),
48            operator_stack: Vec::new(),
49            arg_count: Vec::new(),
50        }
51    }
52
53    pub fn parse(mut self) -> Result<Self, String> {
54        let mut last_is_var_or_fnc = false;
55
56        while let Some(token) = self.tokenizer.next()? {
57            let mut is_var_or_fnc = false;
58            match token {
59                Token::Variable(v) => {
60                    self.inc_arg_count();
61                    is_var_or_fnc = true;
62                    self.output.push(Expression::Variable(v))
63                }
64                Token::Number(n) => {
65                    self.inc_arg_count();
66                    self.output.push(Expression::Constant(n.into()))
67                }
68                Token::String(s) => {
69                    self.inc_arg_count();
70                    self.output.push(Expression::Constant(s.into()))
71                }
72                Token::UnaryOperator(uop) => {
73                    self.operator_stack.push((Token::UnaryOperator(uop), None))
74                }
75                Token::OpenParen => self.operator_stack.push((token, None)),
76                Token::CloseParen | Token::CloseBracket => {
77                    let expect_token = if matches!(token, Token::CloseParen) {
78                        Token::OpenParen
79                    } else {
80                        Token::OpenBracket
81                    };
82                    loop {
83                        match self.operator_stack.pop() {
84                            Some((t, _)) if t == expect_token => {
85                                break;
86                            }
87                            Some((Token::BinaryOperator(bop), jmp_pos)) => {
88                                self.update_jmp_pos(jmp_pos);
89                                self.output.push(Expression::BinaryOperator(bop))
90                            }
91                            Some((Token::UnaryOperator(uop), _)) => {
92                                self.output.push(Expression::UnaryOperator(uop))
93                            }
94                            _ => return Err("Mismatched parentheses".to_string()),
95                        }
96                    }
97
98                    if let Some((Token::Function { id, num_args, name }, _)) =
99                        self.operator_stack.last()
100                    {
101                        let got_args = self.arg_count.pop().unwrap();
102                        if got_args != *num_args as i32 {
103                            return Err(if *id != u32::MAX {
104                                format!(
105                                    "Expression function {:?} expected {} arguments, got {}",
106                                    name, num_args, got_args
107                                )
108                            } else {
109                                "Missing array index".to_string()
110                            });
111                        }
112
113                        let expr = match *id {
114                            ID_ARRAY_ACCESS => Expression::ArrayAccess,
115                            ID_ARRAY_BUILD => Expression::ArrayBuild(*num_args),
116                            id => Expression::Function {
117                                id,
118                                num_args: *num_args,
119                            },
120                        };
121
122                        self.operator_stack.pop();
123                        self.output.push(expr);
124                    }
125
126                    is_var_or_fnc = true;
127                }
128                Token::BinaryOperator(bop) => {
129                    self.dec_arg_count();
130                    while let Some((top_token, prev_jmp_pos)) = self.operator_stack.last() {
131                        match top_token {
132                            Token::BinaryOperator(top_bop) => {
133                                if bop.precedence() <= top_bop.precedence() {
134                                    let top_bop = *top_bop;
135                                    let jmp_pos = *prev_jmp_pos;
136                                    self.update_jmp_pos(jmp_pos);
137                                    self.operator_stack.pop();
138                                    self.output.push(Expression::BinaryOperator(top_bop));
139                                } else {
140                                    break;
141                                }
142                            }
143                            Token::UnaryOperator(top_uop) => {
144                                let top_uop = *top_uop;
145                                self.operator_stack.pop();
146                                self.output.push(Expression::UnaryOperator(top_uop));
147                            }
148                            _ => break,
149                        }
150                    }
151
152                    // Add jump instruction for short-circuiting
153                    let jmp_pos = match bop {
154                        BinaryOperator::And => {
155                            self.output.push(Expression::JmpIf { val: false, pos: 0 });
156                            Some(self.output.len() - 1)
157                        }
158                        BinaryOperator::Or => {
159                            self.output.push(Expression::JmpIf { val: true, pos: 0 });
160                            Some(self.output.len() - 1)
161                        }
162                        _ => None,
163                    };
164
165                    self.operator_stack
166                        .push((Token::BinaryOperator(bop), jmp_pos));
167                }
168                Token::Function { id, name, num_args } => {
169                    self.inc_arg_count();
170                    self.arg_count.push(0);
171                    self.operator_stack
172                        .push((Token::Function { id, name, num_args }, None))
173                }
174                Token::OpenBracket => {
175                    // Array functions
176                    let (id, num_args, arg_count) = if last_is_var_or_fnc {
177                        (ID_ARRAY_ACCESS, 2, 1)
178                    } else {
179                        self.inc_arg_count();
180                        (ID_ARRAY_BUILD, 0, 0)
181                    };
182                    self.arg_count.push(arg_count);
183                    self.operator_stack.push((
184                        Token::Function {
185                            id,
186                            name: String::from("array"),
187                            num_args,
188                        },
189                        None,
190                    ));
191                    self.operator_stack.push((token, None));
192                }
193                Token::Comma => {
194                    while let Some((token, jmp_pos)) = self.operator_stack.last() {
195                        match token {
196                            Token::OpenParen => break,
197                            Token::BinaryOperator(bop) => {
198                                let bop = *bop;
199                                let jmp_pos = *jmp_pos;
200                                self.update_jmp_pos(jmp_pos);
201                                self.output.push(Expression::BinaryOperator(bop));
202                                self.operator_stack.pop();
203                            }
204                            Token::UnaryOperator(uop) => {
205                                self.output.push(Expression::UnaryOperator(*uop));
206                                self.operator_stack.pop();
207                            }
208                            _ => break,
209                        }
210                    }
211                }
212            }
213            last_is_var_or_fnc = is_var_or_fnc;
214        }
215
216        while let Some((token, jmp_pos)) = self.operator_stack.pop() {
217            match token {
218                Token::BinaryOperator(bop) => {
219                    self.update_jmp_pos(jmp_pos);
220                    self.output.push(Expression::BinaryOperator(bop))
221                }
222                Token::UnaryOperator(uop) => self.output.push(Expression::UnaryOperator(uop)),
223                _ => return Err("Invalid token on the operator stack".to_string()),
224            }
225        }
226
227        Ok(self)
228    }
229
230    fn inc_arg_count(&mut self) {
231        if let Some(x) = self.arg_count.last_mut() {
232            *x = x.saturating_add(1);
233            let op_pos = self.operator_stack.len().saturating_sub(2);
234            match self.operator_stack.get_mut(op_pos) {
235                Some((Token::Function { num_args, id, .. }, _)) if *id == ID_ARRAY_BUILD => {
236                    *num_args += 1;
237                }
238                _ => {}
239            }
240        }
241    }
242
243    fn dec_arg_count(&mut self) {
244        if let Some(x) = self.arg_count.last_mut() {
245            *x = x.saturating_sub(1);
246        }
247    }
248
249    fn update_jmp_pos(&mut self, jmp_pos: Option<usize>) {
250        if let Some(jmp_pos) = jmp_pos {
251            let cur_pos = self.output.len();
252            if let Expression::JmpIf { pos, .. } = &mut self.output[jmp_pos] {
253                *pos = (cur_pos - jmp_pos) as u32;
254            } else {
255                #[cfg(test)]
256                panic!("Invalid jump position");
257            }
258        }
259    }
260}
261
262impl BinaryOperator {
263    fn precedence(&self) -> i32 {
264        match self {
265            BinaryOperator::Multiply | BinaryOperator::Divide => 7,
266            BinaryOperator::Add | BinaryOperator::Subtract => 6,
267            BinaryOperator::Gt | BinaryOperator::Ge | BinaryOperator::Lt | BinaryOperator::Le => 5,
268            BinaryOperator::Eq | BinaryOperator::Ne => 4,
269            BinaryOperator::Xor => 3,
270            BinaryOperator::And => 2,
271            BinaryOperator::Or => 1,
272        }
273    }
274}