biodivine_lib_bdd/boolean_expression/
_impl_parser.rs

1//!
2//! Expression parsing proceeds in recursive manner in the order of operator precedence:
3//! `<=>`, `=>`, `|`, `&` and `^`. For each operator, if there is no occurrence in the root of the
4//! token tree, we forward the tree to next operator. If there is an occurrence, we split
5//! the token tree at this point. Left part goes to the next operator, right part is processed
6//! by the same operator to extract additional occurrences.
7
8use super::super::NOT_IN_VAR_NAME;
9use super::BooleanExpression;
10use super::BooleanExpression::*;
11use std::iter::Peekable;
12use std::str::Chars;
13
14/// **(internal)** Tokens that can appear in the boolean expression.
15/// The tokens form a token tree defined by parenthesis groups.
16#[derive(Debug, Eq, PartialEq)]
17enum ExprToken {
18    Not,                    // '!'
19    And,                    // '&'
20    Or,                     // '|'
21    Xor,                    // '^'
22    Imp,                    // '=>'
23    Iff,                    // '<=>'
24    Colon,                  // ':'
25    QuestionMark,           // '?'
26    Id(String),             // 'variable'
27    Tokens(Vec<ExprToken>), // A block of tokens inside parentheses
28}
29
30/// Takes a `String` and turns it into a `BooleanExpression` or `Error` if the string is not valid.
31///
32/// Syntax for the formula is described in the tutorial.
33pub fn parse_boolean_expression(from: &str) -> Result<BooleanExpression, String> {
34    let tokens = tokenize_group(&mut from.chars().peekable(), true)?;
35    Ok(*(parse_formula(&tokens)?))
36}
37
38/// **(internal)** Process a peekable iterator of characters into a vector of `ExprToken`s.
39///
40/// The outer method always consumes the opening parenthesis and the recursive call consumes the
41/// closing parenthesis. Use `top_level` to indicate that there will be no closing parenthesis.
42fn tokenize_group(data: &mut Peekable<Chars>, top_level: bool) -> Result<Vec<ExprToken>, String> {
43    let mut output = Vec::new();
44    while let Some(c) = data.next() {
45        match c {
46            c if c.is_whitespace() => { /* skip whitespace */ }
47            // single char operators
48            '!' => output.push(ExprToken::Not),
49            '&' => output.push(ExprToken::And),
50            '|' => output.push(ExprToken::Or),
51            '^' => output.push(ExprToken::Xor),
52            ':' => output.push(ExprToken::Colon),
53            '?' => output.push(ExprToken::QuestionMark),
54            '=' => {
55                if Some('>') == data.next() {
56                    output.push(ExprToken::Imp);
57                } else {
58                    return Err("Expected '>' after '='.".to_string());
59                }
60            }
61            '<' => {
62                if Some('=') == data.next() {
63                    if Some('>') == data.next() {
64                        output.push(ExprToken::Iff)
65                    } else {
66                        return Err("Expected '>' after '='.".to_string());
67                    }
68                } else {
69                    return Err("Expected '=' after '<'.".to_string());
70                }
71            }
72            // '>' is invalid as a start of a token
73            '>' => return Err("Unexpected '>'.".to_string()),
74            ')' => {
75                return if !top_level {
76                    Ok(output)
77                } else {
78                    Err("Unexpected ')'.".to_string())
79                };
80            }
81            '(' => {
82                // start a nested token group
83                let tokens = tokenize_group(data, false)?;
84                output.push(ExprToken::Tokens(tokens));
85            }
86            _ => {
87                // start of a variable name
88                let mut name = String::new();
89                name.push(c);
90                while let Some(c) = data.peek() {
91                    if c.is_whitespace() || NOT_IN_VAR_NAME.contains(c) {
92                        break;
93                    } else {
94                        name.push(*c);
95                        data.next(); // advance iterator
96                    }
97                }
98                output.push(ExprToken::Id(name));
99            }
100        }
101    }
102    if top_level {
103        Ok(output)
104    } else {
105        Err("Expected ')'.".to_string())
106    }
107}
108
109/// **(internal)** Parse a `ExprToken` tree into a `BooleanExpression` (or error if invalid).
110fn parse_formula(data: &[ExprToken]) -> Result<Box<BooleanExpression>, String> {
111    if data.len() == 1 && matches!(data[0], ExprToken::Tokens(..)) {
112        // A "fast-forward" branch for `(...)` formulas that tend to overflow the parser stack.
113        return terminal(data);
114    }
115    iff(data)
116}
117
118/// **(internal)** Utility method to find first occurrence of a specific token in the token tree.
119fn index_of_first(data: &[ExprToken], token: ExprToken) -> Option<usize> {
120    data.iter().position(|t| *t == token)
121}
122
123/// **(internal)** Recursive parsing step 1: extract `<=>` operators.
124fn iff(data: &[ExprToken]) -> Result<Box<BooleanExpression>, String> {
125    let iff_token = index_of_first(data, ExprToken::Iff);
126    if let Some(iff_token) = iff_token {
127        Ok(Box::new(Iff(
128            imp(&data[..iff_token])?,
129            iff(&data[(iff_token + 1)..])?,
130        )))
131    } else {
132        imp(data)
133    }
134}
135
136/// **(internal)** Recursive parsing step 2: extract `=>` operators.
137fn imp(data: &[ExprToken]) -> Result<Box<BooleanExpression>, String> {
138    let imp_token = index_of_first(data, ExprToken::Imp);
139    if let Some(imp_token) = imp_token {
140        Ok(Box::new(Imp(
141            cond(&data[..imp_token])?,
142            imp(&data[(imp_token + 1)..])?,
143        )))
144    } else {
145        cond(data)
146    }
147}
148
149/// **(internal)** Recursive parsing step 3: extract `cond ? then_expr : else_expr` operators.
150///
151/// + Valid: `(cond1 ? then_expr1 : else_expr1) + (cond2 ? then_expr2 : else_expr2)`
152///
153/// + Valid: `(cond1 ? then_expr1 : else_expr1) + cond2 ? then_expr2 : else_expr2`
154///
155/// + Valid: `cond1 ? then_expr1 : else_expr1 + (cond2 ? then_expr2 : else_expr2)`
156///
157/// + Invalid: `cond1 ? then_expr1 : cond2 ? then_expr2 : else_expr2`
158fn cond(data: &[ExprToken]) -> Result<Box<BooleanExpression>, String> {
159    let question_token = index_of_first(data, ExprToken::QuestionMark);
160    let colon_token = index_of_first(data, ExprToken::Colon);
161    match (question_token, colon_token) {
162        (None, None) => or(data),
163        (Some(question_token), Some(colon_token)) => Ok(Box::new(Cond(
164            or(&data[..question_token])?,
165            or(&data[(question_token + 1)..colon_token])?,
166            or(&data[(colon_token + 1)..])?,
167        ))),
168        (None, Some(_)) => Err("Expected `?` but only found `:`.".to_string()),
169        (Some(_), None) => Err("Expected `:` but only found `?`.".to_string()),
170    }
171}
172
173/// **(internal)** Recursive parsing step 4: extract `|` operators.
174fn or(data: &[ExprToken]) -> Result<Box<BooleanExpression>, String> {
175    let or_token = index_of_first(data, ExprToken::Or);
176    if let Some(or_token) = or_token {
177        Ok(Box::new(Or(
178            and(&data[..or_token])?,
179            or(&data[(or_token + 1)..])?,
180        )))
181    } else {
182        and(data)
183    }
184}
185
186/// **(internal)** Recursive parsing step 5: extract `&` operators.
187fn and(data: &[ExprToken]) -> Result<Box<BooleanExpression>, String> {
188    let and_token = index_of_first(data, ExprToken::And);
189    if let Some(and_token) = and_token {
190        Ok(Box::new(And(
191            xor(&data[..and_token])?,
192            and(&data[(and_token + 1)..])?,
193        )))
194    } else {
195        xor(data)
196    }
197}
198
199/// **(internal)** Recursive parsing step 6: extract `^` operators.
200fn xor(data: &[ExprToken]) -> Result<Box<BooleanExpression>, String> {
201    let xor_token = index_of_first(data, ExprToken::Xor);
202    if let Some(xor_token) = xor_token {
203        Ok(Box::new(Xor(
204            terminal(&data[..xor_token])?,
205            xor(&data[(xor_token + 1)..])?,
206        )))
207    } else {
208        terminal(data)
209    }
210}
211
212/// **(internal)** Recursive parsing step 7: extract terminals and negations.
213fn terminal(data: &[ExprToken]) -> Result<Box<BooleanExpression>, String> {
214    if data.is_empty() {
215        Err("Expected formula, found nothing :(".to_string())
216    } else if data[0] == ExprToken::Not {
217        Ok(Box::new(Not(terminal(&data[1..])?)))
218    } else if data.len() > 1 {
219        Err(format!(
220            "Expected variable name or (...), but found {data:?}."
221        ))
222    } else {
223        match &data[0] {
224            ExprToken::Id(name) => {
225                if name == "true" {
226                    Ok(Box::new(Const(true)))
227                } else if name == "false" {
228                    Ok(Box::new(Const(false)))
229                } else {
230                    Ok(Box::new(Variable(name.clone())))
231                }
232            }
233            ExprToken::Tokens(inner) => Ok(parse_formula(inner)?),
234            _ => unreachable!(
235                "Other tokens are matched by remaining functions, nothing else should remain."
236            ),
237        }
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    #[test]
246    fn parse_boolean_formula_basic() {
247        let inputs = vec![
248            "v_1+{14}",                       // just a variable name with fancy symbols
249            "!v_1",                           // negation
250            "true",                           // true
251            "false",                          // false
252            "(v_1 & v_2)",                    // and
253            "(cond ? then_expr : else_expr)", // cond
254            "(v_1 | v_2)",                    // or
255            "(v_1 ^ v_2)",                    // xor
256            "(v_1 => v_2)",                   // imp
257            "(v_1 <=> v_2)",                  // iff
258        ];
259        for input in inputs {
260            assert_eq!(
261                input,
262                format!("{}", parse_boolean_expression(input).unwrap())
263            );
264        }
265    }
266
267    #[test]
268    fn parse_boolean_formula_operator_priority() {
269        assert_eq!(
270            "(((((!a ^ !b) & !c) | !d) => !e) <=> !f)",
271            format!(
272                "{}",
273                parse_boolean_expression("!a ^ !b & !c | !d => !e <=> !f").unwrap()
274            )
275        )
276    }
277
278    #[test]
279    fn parse_boolean_formula_operator_associativity() {
280        assert_eq!(
281            "(a & (b & c))",
282            format!("{}", parse_boolean_expression("a & b & c").unwrap())
283        );
284        assert_eq!(
285            "(a | (b | c))",
286            format!("{}", parse_boolean_expression("a | b | c").unwrap())
287        );
288        assert_eq!(
289            "(a ^ (b ^ c))",
290            format!("{}", parse_boolean_expression("a ^ b ^ c").unwrap())
291        );
292        assert_eq!(
293            "(a => (b => c))",
294            format!("{}", parse_boolean_expression("a => b => c").unwrap())
295        );
296        assert_eq!(
297            "(a <=> (b <=> c))",
298            format!("{}", parse_boolean_expression("a <=> b <=> c").unwrap())
299        );
300        assert_eq!(
301            "((a ? b : c) ? d : e)",
302            format!(
303                "{}",
304                parse_boolean_expression("(a ? b : c) ? d : e").unwrap()
305            )
306        );
307        assert_eq!(
308            "(a ? b : (c ? d : e))",
309            format!(
310                "{}",
311                parse_boolean_expression("a ? b : (c ? d : e)").unwrap()
312            )
313        );
314    }
315
316    #[test]
317    fn parse_boolean_formula_complex() {
318        assert_eq!(
319            "((a & !!b) => (!(t | (!!a & b)) <=> (x ^ y)))",
320            format!(
321                "{}",
322                parse_boolean_expression("a &!(!b)   => (!(t | !!a&b) <=> x^y)").unwrap()
323            )
324        )
325    }
326
327    #[test]
328    #[should_panic]
329    fn parse_boolean_formula_invalid_token_1() {
330        parse_boolean_expression("a = b").unwrap();
331    }
332
333    #[test]
334    #[should_panic]
335    fn parse_boolean_formula_invalid_token_2() {
336        parse_boolean_expression("a < b").unwrap();
337    }
338
339    #[test]
340    #[should_panic]
341    fn parse_boolean_formula_invalid_token_3() {
342        parse_boolean_expression("a <= b").unwrap();
343    }
344
345    #[test]
346    #[should_panic]
347    fn parse_boolean_formula_invalid_token_4() {
348        parse_boolean_expression("a > b").unwrap();
349    }
350
351    #[test]
352    #[should_panic]
353    fn parse_boolean_formula_invalid_parentheses_1() {
354        parse_boolean_expression("(a").unwrap();
355    }
356
357    #[test]
358    #[should_panic]
359    fn parse_boolean_formula_invalid_parentheses_2() {
360        parse_boolean_expression("b)").unwrap();
361    }
362
363    #[test]
364    #[should_panic]
365    fn parse_boolean_formula_invalid_parentheses_3() {
366        parse_boolean_expression("(a & (b)").unwrap();
367    }
368
369    #[test]
370    #[should_panic]
371    fn parse_boolean_formula_invalid_parentheses_4() {
372        parse_boolean_expression("a & (b))").unwrap();
373    }
374
375    #[test]
376    #[should_panic]
377    fn parse_boolean_formula_invalid_parentheses_5() {
378        parse_boolean_expression("a ? b : c ? d : e").unwrap();
379    }
380
381    #[test]
382    #[should_panic]
383    fn parse_boolean_formula_invalid_formula_1() {
384        parse_boolean_expression("a & & b").unwrap();
385    }
386
387    #[test]
388    #[should_panic]
389    fn parse_boolean_formula_invalid_formula_2() {
390        parse_boolean_expression("a & c d & b").unwrap();
391    }
392
393    #[test]
394    #[should_panic]
395    fn parse_boolean_formula_invalid_formula_3() {
396        parse_boolean_expression("a ? b").unwrap();
397    }
398
399    #[test]
400    #[should_panic]
401    fn parse_boolean_formula_invalid_formula_4() {
402        parse_boolean_expression("a : b").unwrap();
403    }
404}