cas_parser/parser/ast/
binary.rs

1use crate::parser::{
2    ast::{
3        assign::{Assign as AssignExpr, AssignTarget},
4        expr::Expr,
5        unary::Unary,
6    },
7    error::{kind, Error},
8    fmt::{Latex, fmt_pow},
9    token::{op::{AssignOp, Associativity, BinOp, BinOpKind, Precedence}, Assign},
10    Parse,
11    Parser,
12    ParseResult,
13};
14use std::{fmt, ops::Range};
15
16#[cfg(feature = "serde")]
17use serde::{Deserialize, Serialize};
18
19/// A binary operator, including assignment.
20#[derive(Debug, Clone, PartialEq)]
21enum BinOpExt {
22    /// A binary operator, such as `+` or `*`.
23    Op(BinOp),
24
25    /// Implicit multiplication, such as `2x` or `x(x + 1)`.
26    ///
27    /// This is not a real operator, but it is treated as one for the purposes of parsing.
28    ImplicitMultiplication,
29
30    /// An assignment operator, such as `+=` or `/=`.
31    Assign(AssignOp),
32}
33
34impl BinOpExt {
35    /// Returns the precedence of the binary operator.
36    fn precedence(&self) -> Precedence {
37        match self {
38            BinOpExt::Op(op) => op.precedence(),
39            BinOpExt::ImplicitMultiplication => Precedence::Factor,
40            BinOpExt::Assign(_) => Precedence::Assign,
41        }
42    }
43}
44
45impl From<BinOp> for BinOpExt {
46    fn from(op: BinOp) -> Self {
47        BinOpExt::Op(op)
48    }
49}
50
51impl From<AssignOp> for BinOpExt {
52    fn from(op: AssignOp) -> Self {
53        BinOpExt::Assign(op)
54    }
55}
56
57/// A binary expression, such as `1 + 2`. Binary expressions can include nested expressions.
58#[derive(Debug, Clone, PartialEq)]
59#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
60pub struct Binary {
61    /// The left-hand side of the binary expression.
62    pub lhs: Box<Expr>,
63
64    /// The operator of the binary expression.
65    pub op: BinOp,
66
67    /// The right-hand side of the binary expression.
68    pub rhs: Box<Expr>,
69
70    /// The region of the source code that this binary expression was parsed from.
71    pub span: Range<usize>,
72}
73
74impl Binary {
75    /// Returns the span of the binary expression.
76    pub fn span(&self) -> Range<usize> {
77        self.span.clone()
78    }
79
80    /// After parsing the left-hand-side, the operator, and the right-hand-side of a potential
81    /// binary expression, parse ahead to see if the right-hand-side is incomplete.
82    ///
83    /// If we are parsing the expression `1 + 2 * 3`, we will first parse the left-hand-side `1`,
84    /// then the operator `+`, then the right-hand-side `2`. However, before we build the
85    /// corresponding AST node, we should check if the operator after `2` has higher precedence
86    /// than `+` (if it exists).
87    ///
88    /// If it does, we should parse the expression starting with `2` first, so that we get `2 * 3`
89    /// as the right-hand-side to the `1 +` node. This works by calling into [`Self::parse_expr`]
90    /// again, but with `rhs` (`2` in this case) as the `lhs` argument.
91    ///
92    /// If it does not (such as in the expression `3 * 2 + 1`), we build the AST node `3 * 2`
93    /// first. Then, [`Self::parse_expr`] will pick up the `+ 1` part of the expression, and
94    /// build the AST node `3 * 2 + 1`.
95    ///
96    /// Implicit multiplication is also handled here. In an expression such as `1 + 2x y`, the
97    /// first call to [`Self::parse_expr`] will parse `1 + 2`. However, there is no operator after
98    /// `2`, so instead, we assume an implicit multiplication operator, because multiplication has
99    /// higher precedence than addition, then continue with the same procedure as if the operator
100    /// did exist.
101    ///
102    /// There is one distinction we must make when parsing implicit multiplication. Since we're
103    /// essentially creating multiplication out of thin air, [`Self::complete_rhs`] can get into an
104    /// infinite loop if we're not careful!
105    ///
106    /// Consider the expression `1 + 2x`. The first call to [`Self::complete_rhs`] will contain the
107    /// left-hand-side `1`, the operator `+`, and the right-hand-side `2`. Since there is no
108    /// operator after `2`, we assume implicit multiplication (higher precedence than `+`), and
109    /// successfully parse `2x`. At this point, we've returned to the first call to
110    /// [`Self::complete_rhs`], where the right-hand-side is now `2x`. However, there is still no
111    /// operator after `2x`, so we assume implicit multiplication again, but parse nothing;
112    /// [`Self::parse_expr`] would simply return `2x` as-is, and we would return to the first call
113    /// to [`Self::complete_rhs`] again.
114    ///
115    /// Thankfully, the solution is simple: just check if `2x` is returned as-is! If so, there is
116    /// no expression after `2x`, so we should break out of the loop and return the AST node
117    /// `1 + 2x`. This is the purpose of the `changed` boolean returned by [`Self::parse_expr`].
118    fn complete_rhs(
119        input: &mut Parser,
120        recoverable_errors: &mut Vec<Error>,
121        lhs: Expr,
122        op: BinOpExt,
123        mut rhs: Expr
124    ) -> Result<Expr, Vec<Error>> {
125        let precedence = op.precedence();
126
127        loop {
128            // before creating the `lhs op rhs` node, we should check the precedence of the
129            // following operator, if any
130            // this is because we can't parse an expression like `3 + 4 * 5`, as (3 + 4) * 5
131
132            // clone the input stream to emulate peeking
133            let mut input_ahead = input.clone();
134            if let Ok(next_op) = input_ahead.try_parse::<BinOp>().forward_errors(recoverable_errors) {
135                if next_op.precedence() > precedence || next_op.associativity() == Associativity::Right {
136                    // this operator has a higher precedence or it is right associative, so we should
137                    // parse its expression starting with `rhs` first
138                    rhs = Self::parse_expr(input, recoverable_errors, rhs, next_op.precedence())?.0;
139                } else {
140                    // this operator has lower precedence, or equal precedence and
141                    // left-associativity; this is in scenarios like:
142                    // `1 * 2 + 3` or `1 * 2 * 3`
143                    // prec(+) < prec(*), prec(*) == prec(*)
144                    //
145                    // so just break out of the loop and let `lhs` become `1 * 2`
146                    // we will parse this operator on the next iteration of the outside loop
147                    break;
148                }
149            } else if input_ahead.try_parse::<Assign>().is_ok() {
150                // assignment is right-associative, so we should parse its expression starting with
151                // `rhs` first
152                if Precedence::Assign >= precedence {
153                    rhs = Self::parse_expr(input, recoverable_errors, rhs, Precedence::Assign)?.0;
154                } else {
155                    break;
156                }
157            } else {
158                // there is no operator; check if there is a valid expression after
159                // if there is, this could be implicit multiplication
160                //
161                // first, check if the previous operator has higher or equal precedence; if so, we
162                // cannot give priority to implicit multiplication
163                if precedence >= BinOpKind::Mul.precedence() {
164                    break;
165                }
166
167                // then check if there is significant whitespace after `rhs`; if there is, we cannot
168                // parse implicit multiplication, as that would be confusing
169                input_ahead.advance_past_non_significant_whitespace();
170                if let Some(token) = input_ahead.current_token() {
171                    if token.kind.is_significant_whitespace() {
172                        break;
173                    }
174                }
175
176                let (expr, changed) = Self::parse_expr(input, recoverable_errors, rhs, BinOpKind::Mul.precedence())?;
177
178                // `rhs = expr;` must happen in all cases, even if `changed` is false, otherwise it
179                // would've been moved into `Self::parse_expr` above
180                rhs = expr;
181
182                if !changed {
183                    break;
184                }
185            }
186        }
187
188        // create the binary node representing `lhs op rhs`
189        let (start_span, end_span) = (lhs.span().start, rhs.span().end);
190        match op {
191            BinOpExt::Op(op) => Ok(Expr::Binary(Binary {
192                lhs: Box::new(lhs),
193                op,
194                rhs: Box::new(rhs),
195                span: start_span..end_span,
196            })),
197            BinOpExt::ImplicitMultiplication => {
198                let op_span = lhs.span().end..rhs.span().start;
199                Ok(Expr::Binary(Binary {
200                    lhs: Box::new(lhs),
201                    op: BinOp {
202                        kind: BinOpKind::Mul,
203                        implicit: true,
204                        span: op_span,
205                    },
206                    rhs: Box::new(rhs),
207                    span: start_span..end_span,
208                }))
209            },
210            BinOpExt::Assign(op) => Ok(Expr::Assign(AssignExpr {
211                target: AssignTarget::try_from_with_op(lhs, &op).forward_errors(recoverable_errors)?,
212                op,
213                value: Box::new(rhs),
214                span: start_span..end_span,
215            })),
216        }
217    }
218
219    /// After parsing the left-hand-side of a potential binary expression, parse ahead to see if
220    /// there is a binary operator and a right-hand-side.
221    ///
222    /// See [`Self::complete_rhs`] for more information about the return value of this function.
223    pub(crate) fn parse_expr(
224        input: &mut Parser,
225        recoverable_errors: &mut Vec<Error>,
226        mut lhs: Expr,
227        precedence: Precedence
228    ) -> Result<(Expr, bool), Vec<Error>> {
229        let mut changed = false;
230        loop {
231            let mut input_ahead = input.clone();
232            if let Ok(op) = input_ahead.try_parse_then::<BinOp, _>(|bin_op, input| {
233                if bin_op.precedence() >= precedence {
234                    ParseResult::Ok(())
235                } else {
236                    ParseResult::Unrecoverable(vec![input.error(kind::NonFatal)])
237                }
238            }).forward_errors(recoverable_errors) {
239                input.set_cursor(&input_ahead);
240                let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
241                lhs = Self::complete_rhs(input, recoverable_errors, lhs, op.into(), rhs)?;
242            } else if let Ok(assign) = input_ahead.try_parse_then::<AssignOp, _>(|_, input| {
243                if Precedence::Assign >= precedence {
244                    ParseResult::Ok(())
245                } else {
246                    ParseResult::Unrecoverable(vec![input.error(kind::NonFatal)])
247                }
248            }).forward_errors(recoverable_errors) {
249                // assignment is also a binary expression, however it requires special handling
250                // because not all expressions are valid as the left-hand side of an assignment
251                // expression, and there is some syntax is only valid in the context of an
252                // assignment expression (i.e. function headers)
253                input.set_cursor(&input_ahead);
254                let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
255                lhs = Self::complete_rhs(input, recoverable_errors, lhs, assign.into(), rhs)?;
256            } else if BinOpKind::Mul.precedence() >= precedence {
257                // implicit multiplication test
258
259                // do not continue if there is significant whitespace after `lhs`
260                input_ahead.advance_past_non_significant_whitespace();
261                if let Some(token) = input_ahead.current_token() {
262                    if token.kind.is_significant_whitespace() {
263                        break;
264                    }
265                }
266
267                // ensure that we get here because there is *no* operator, not because the operator
268                // has lower precedence
269                if input_ahead.try_parse_then::<BinOp, _>(|op, input| {
270                    if op.precedence() > BinOpKind::Mul.precedence() {
271                        ParseResult::Unrecoverable(vec![input.error(kind::NonFatal)])
272                    } else {
273                        ParseResult::Ok(())
274                    }
275                }).is_ok() {
276                    break;
277                }
278
279                // if there is no expression, there is no implicit multiplication and all our
280                // attempts to parse a binary expression fail
281                let Ok(rhs) = Unary::parse_or_lower(input, recoverable_errors) else {
282                    break;
283                };
284                lhs = Self::complete_rhs(input, recoverable_errors, lhs, BinOpExt::ImplicitMultiplication, rhs)?;
285            } else {
286                break;
287            }
288
289            changed = true;
290        }
291
292        Ok((lhs, changed))
293    }
294}
295
296impl<'source> Parse<'source> for Binary {
297    fn std_parse(
298        input: &mut Parser<'source>,
299        recoverable_errors: &mut Vec<Error>
300    ) -> Result<Self, Vec<Error>> {
301        match input.try_parse().forward_errors(recoverable_errors)? {
302            Expr::Binary(binary) => Ok(binary),
303            _ => todo!(),
304        }
305    }
306}
307
308impl std::fmt::Display for Binary {
309    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310        self.lhs.fmt(f)?;
311        self.op.fmt(f)?;
312        self.rhs.fmt(f)
313    }
314}
315
316impl Latex for Binary {
317    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
318        match self.op.kind {
319            BinOpKind::Exp => fmt_pow(f, Some(&*self.lhs), Some(&*self.rhs)),
320            BinOpKind::Div => {
321                write!(f, "\\frac{{")?;
322                self.lhs.fmt_latex(f)?;
323                write!(f, "}}{{")?;
324                self.rhs.innermost().fmt_latex(f)?;
325                write!(f, "}}")
326            },
327            _ => {
328                self.lhs.fmt_latex(f)?;
329                self.op.fmt_latex(f)?;
330                self.rhs.fmt_latex(f)
331            },
332        }
333    }
334}