cas_parser/parser/ast/
binary.rs

1use cas_error::Error;
2use crate::parser::{
3    ast::{
4        assign::{Assign as AssignExpr, AssignTarget},
5        expr::Expr,
6        range::{Range as RangeExpr, RangeKind},
7        unary::Unary,
8    },
9    error::NonFatal,
10    fmt::{Latex, fmt_pow},
11    token::{
12        op::{
13            AssignOp,
14            Associativity,
15            BinOp,
16            BinOpKind,
17            Precedence,
18        },
19        Assign,
20        RangeClosed,
21        RangeHalfOpen,
22    },
23    Parse,
24    Parser,
25    ParseResult,
26};
27use std::{fmt, ops::Range};
28
29#[cfg(feature = "serde")]
30use serde::{Deserialize, Serialize};
31
32/// A binary operator, including assignment.
33#[derive(Debug, Clone, PartialEq, Eq)]
34enum BinOpExt {
35    /// A binary operator, such as `+` or `*`.
36    Op(BinOp),
37
38    /// Implicit multiplication, such as `2x` or `x(x + 1)`.
39    ///
40    /// This is not a real operator, but it is treated as one for the purposes of parsing.
41    ImplicitMultiplication,
42
43    /// An assignment operator, such as `+=` or `/=`.
44    Assign(AssignOp),
45
46    /// A range operator, such as `..` or `..=`.
47    Range(RangeKind)
48}
49
50impl BinOpExt {
51    /// Returns the precedence of the binary operator.
52    fn precedence(&self) -> Precedence {
53        match self {
54            BinOpExt::Op(op) => op.precedence(),
55            BinOpExt::ImplicitMultiplication => Precedence::Factor,
56            BinOpExt::Assign(_) => Precedence::Assign,
57            BinOpExt::Range(_) => Precedence::Range,
58        }
59    }
60}
61
62impl From<BinOp> for BinOpExt {
63    fn from(op: BinOp) -> Self {
64        BinOpExt::Op(op)
65    }
66}
67
68impl From<AssignOp> for BinOpExt {
69    fn from(op: AssignOp) -> Self {
70        BinOpExt::Assign(op)
71    }
72}
73
74/// A binary expression, such as `1 + 2`. Binary expressions can include nested expressions.
75#[derive(Debug, Clone, PartialEq, Eq)]
76#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
77pub struct Binary {
78    /// The left-hand side of the binary expression.
79    pub lhs: Box<Expr>,
80
81    /// The operator of the binary expression.
82    pub op: BinOp,
83
84    /// The right-hand side of the binary expression.
85    pub rhs: Box<Expr>,
86
87    /// The region of the source code that this binary expression was parsed from.
88    pub span: Range<usize>,
89}
90
91impl Binary {
92    /// Returns the span of the binary expression.
93    pub fn span(&self) -> Range<usize> {
94        self.span.clone()
95    }
96
97    /// After parsing the left-hand-side, the operator, and the right-hand-side of a potential
98    /// binary expression, parse ahead to see if the right-hand-side is incomplete.
99    ///
100    /// If we are parsing the expression `1 + 2 * 3`, we will first parse the left-hand-side `1`,
101    /// then the operator `+`, then the right-hand-side `2`. However, before we build the
102    /// corresponding AST node, we should check if the operator after `2` has higher precedence
103    /// than `+` (if it exists).
104    ///
105    /// If it does, we should parse the expression starting with `2` first, so that we get `2 * 3`
106    /// as the right-hand-side to the `1 +` node. This works by calling into [`Self::parse_expr`]
107    /// again, but with `rhs` (`2` in this case) as the `lhs` argument.
108    ///
109    /// If it does not (such as in the expression `3 * 2 + 1`), we build the AST node `3 * 2`
110    /// first. Then, [`Self::parse_expr`] will pick up the `+ 1` part of the expression, and
111    /// build the AST node `3 * 2 + 1`.
112    ///
113    /// Implicit multiplication is also handled here. In an expression such as `1 + 2x y`, the
114    /// first call to [`Self::parse_expr`] will parse `1 + 2`. However, there is no operator after
115    /// `2`, so instead, we assume an implicit multiplication operator, because multiplication has
116    /// higher precedence than addition, then continue with the same procedure as if the operator
117    /// did exist.
118    ///
119    /// There is one distinction we must make when parsing implicit multiplication. Since we're
120    /// essentially creating multiplication out of thin air, [`Self::complete_rhs`] can get into an
121    /// infinite loop if we're not careful!
122    ///
123    /// Consider the expression `1 + 2x`. The first call to [`Self::complete_rhs`] will contain the
124    /// left-hand-side `1`, the operator `+`, and the right-hand-side `2`. Since there is no
125    /// operator after `2`, we assume implicit multiplication (higher precedence than `+`), and
126    /// successfully parse `2x`. At this point, we've returned to the first call to
127    /// [`Self::complete_rhs`], where the right-hand-side is now `2x`. However, there is still no
128    /// operator after `2x`, so we assume implicit multiplication again, but parse nothing;
129    /// [`Self::parse_expr`] would simply return `2x` as-is, and we would return to the first call
130    /// to [`Self::complete_rhs`] again.
131    ///
132    /// Thankfully, the solution is simple: just check if `2x` is returned as-is! If so, there is
133    /// no expression after `2x`, so we should break out of the loop and return the AST node
134    /// `1 + 2x`. This is the purpose of the `changed` boolean returned by [`Self::parse_expr`].
135    fn complete_rhs(
136        input: &mut Parser,
137        recoverable_errors: &mut Vec<Error>,
138        lhs: Expr,
139        op: BinOpExt,
140        mut rhs: Expr
141    ) -> Result<Expr, Vec<Error>> {
142        let precedence = op.precedence();
143
144        loop {
145            // before creating the `lhs op rhs` node, we should check the precedence of the
146            // following operator, if any
147            // this is because we can't parse an expression like `3 + 4 * 5`, as (3 + 4) * 5
148
149            // clone the input stream to emulate peeking
150            let mut input_ahead = input.clone();
151            if let Ok(next_op) = input_ahead.try_parse::<BinOp>().forward_errors(recoverable_errors) {
152                if next_op.precedence() > precedence || next_op.associativity() == Associativity::Right {
153                    // this operator has a higher precedence or it is right associative, so we should
154                    // parse its expression starting with `rhs` first
155                    rhs = Self::parse_expr(input, recoverable_errors, rhs, next_op.precedence())?.0;
156                } else {
157                    // this operator has lower precedence, or equal precedence and
158                    // left-associativity; this is in scenarios like:
159                    // `1 * 2 + 3` or `1 * 2 * 3`
160                    // prec(+) < prec(*), prec(*) == prec(*)
161                    //
162                    // so just break out of the loop and let `lhs` become `1 * 2`
163                    // we will parse this operator on the next iteration of the outside loop
164                    break;
165                }
166            } else if input_ahead.try_parse::<Assign>().is_ok() {
167                // assignment is right-associative, so we should parse its expression starting with
168                // `rhs` first
169                if Precedence::Assign >= precedence {
170                    rhs = Self::parse_expr(input, recoverable_errors, rhs, Precedence::Assign)?.0;
171                } else {
172                    break;
173                }
174            } else {
175                // there is no operator; check if there is a valid expression after
176                // if there is, this could be implicit multiplication
177                //
178                // first, check if the previous operator has higher or equal precedence; if so, we
179                // cannot give priority to implicit multiplication
180                if precedence >= BinOpKind::Mul.precedence() {
181                    break;
182                }
183
184                // then check if there is significant whitespace after `rhs`; if there is, we cannot
185                // parse implicit multiplication, as that would be confusing
186                input_ahead.advance_past_non_significant_whitespace();
187                if let Some(token) = input_ahead.current_token() {
188                    if token.kind.is_significant_whitespace() {
189                        break;
190                    }
191                }
192
193                let (expr, changed) = Self::parse_expr(input, recoverable_errors, rhs, BinOpKind::Mul.precedence())?;
194
195                // `rhs = expr;` must happen in all cases, even if `changed` is false, otherwise it
196                // would've been moved into `Self::parse_expr` above
197                rhs = expr;
198
199                if !changed {
200                    break;
201                }
202            }
203        }
204
205        // create the binary node representing `lhs op rhs`
206        let (start_span, end_span) = (lhs.span().start, rhs.span().end);
207        match op {
208            BinOpExt::Op(op) => Ok(Expr::Binary(Binary {
209                lhs: Box::new(lhs),
210                op,
211                rhs: Box::new(rhs),
212                span: start_span..end_span,
213            })),
214            BinOpExt::ImplicitMultiplication => {
215                let op_span = lhs.span().end..rhs.span().start;
216                Ok(Expr::Binary(Binary {
217                    lhs: Box::new(lhs),
218                    op: BinOp {
219                        kind: BinOpKind::Mul,
220                        implicit: true,
221                        span: op_span,
222                    },
223                    rhs: Box::new(rhs),
224                    span: start_span..end_span,
225                }))
226            },
227            BinOpExt::Assign(op) => Ok(Expr::Assign(AssignExpr {
228                target: AssignTarget::try_from_with_op(lhs, &op).forward_errors(recoverable_errors)?,
229                op,
230                value: Box::new(rhs),
231                span: start_span..end_span,
232            })),
233            BinOpExt::Range(kind) => Ok(Expr::Range(RangeExpr {
234                start: Box::new(lhs),
235                end: Box::new(rhs),
236                kind,
237                span: start_span..end_span,
238            })),
239        }
240    }
241
242    /// After parsing the left-hand-side of a potential binary expression, parse ahead to see if
243    /// there is a binary operator and a right-hand-side.
244    ///
245    /// See [`Self::complete_rhs`] for more information about the return value of this function.
246    pub(crate) fn parse_expr(
247        input: &mut Parser,
248        recoverable_errors: &mut Vec<Error>,
249        mut lhs: Expr,
250        precedence: Precedence
251    ) -> Result<(Expr, bool), Vec<Error>> {
252        let mut changed = false;
253        loop {
254            let mut input_ahead = input.clone();
255            if let Ok(op) = input_ahead.try_parse_then::<BinOp, _>(|bin_op, input| {
256                if bin_op.precedence() >= precedence {
257                    ParseResult::Ok(())
258                } else {
259                    ParseResult::Unrecoverable(vec![input.error(NonFatal)])
260                }
261            }).forward_errors(recoverable_errors) {
262                input.set_cursor(&input_ahead);
263                let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
264                lhs = Self::complete_rhs(input, recoverable_errors, lhs, op.into(), rhs)?;
265            } else if let Ok(assign) = input_ahead.try_parse_then::<AssignOp, _>(|_, input| {
266                if Precedence::Assign >= precedence {
267                    ParseResult::Ok(())
268                } else {
269                    ParseResult::Unrecoverable(vec![input.error(NonFatal)])
270                }
271            }).forward_errors(recoverable_errors) {
272                // assignment is also a binary expression, however it requires special handling
273                // because not all expressions are valid as the left-hand side of an assignment
274                // expression, and there is some syntax is only valid in the context of an
275                // assignment expression (i.e. function headers)
276                input.set_cursor(&input_ahead);
277                let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
278                lhs = Self::complete_rhs(input, recoverable_errors, lhs, assign.into(), rhs)?;
279            } else if input_ahead.try_parse_then::<RangeHalfOpen, _>(|_, input| {
280                if Precedence::Range >= precedence {
281                    ParseResult::Ok(())
282                } else {
283                    ParseResult::Unrecoverable(vec![input.error(NonFatal)])
284                }
285            }).is_ok() {
286                // range expressions are also binary expressions, but they require special handling
287                // because they are not valid in all contexts
288                input.set_cursor(&input_ahead);
289                let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
290                lhs = Self::complete_rhs(
291                    input,
292                    recoverable_errors,
293                    lhs,
294                    BinOpExt::Range(RangeKind::HalfOpen),
295                    rhs,
296                )?;
297            } else if input_ahead.try_parse_then::<RangeClosed, _>(|_, input| {
298                if Precedence::Range >= precedence {
299                    ParseResult::Ok(())
300                } else {
301                    ParseResult::Unrecoverable(vec![input.error(NonFatal)])
302                }
303            }).is_ok() {
304                input.set_cursor(&input_ahead);
305                let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
306                lhs = Self::complete_rhs(
307                    input,
308                    recoverable_errors,
309                    lhs,
310                    BinOpExt::Range(RangeKind::Closed),
311                    rhs,
312                )?;
313            } else if BinOpKind::Mul.precedence() >= precedence {
314                // implicit multiplication test
315
316                // do not continue if there is significant whitespace after `lhs`
317                input_ahead.advance_past_non_significant_whitespace();
318                if let Some(token) = input_ahead.current_token() {
319                    if token.kind.is_significant_whitespace() {
320                        break;
321                    }
322                }
323
324                // ensure that we get here because there is *no* operator, not because the operator
325                // has lower precedence
326                if input_ahead.try_parse_then::<BinOp, _>(|op, input| {
327                    if op.precedence() > BinOpKind::Mul.precedence() {
328                        ParseResult::Unrecoverable(vec![input.error(NonFatal)])
329                    } else {
330                        ParseResult::Ok(())
331                    }
332                }).is_ok() {
333                    break;
334                }
335
336                // if there is no expression, there is no implicit multiplication and all our
337                // attempts to parse a binary expression fail
338                let mut inner_recoverable_errors = Vec::new();
339                let Ok(rhs) = Unary::parse_or_lower(&mut input_ahead, &mut inner_recoverable_errors) else {
340                    break;
341                };
342
343                if rhs.is_implicit_mul_target() {
344                    // TODO: this can be refactored with input.try_parse_with_fn once Try trait is
345                    // stabilized, which would make ParseResult so much nicer to work with
346
347                    // add the recoverable errors from the inner parser to the outer parser, since
348                    // we know now implicit multiplication is the correct branch to take
349                    recoverable_errors.extend(inner_recoverable_errors);
350                    input.set_cursor(&input_ahead);
351                    lhs = Self::complete_rhs(
352                        input,
353                        recoverable_errors,
354                        lhs,
355                        BinOpExt::ImplicitMultiplication,
356                        rhs,
357                    )?;
358                } else {
359                    break;
360                }
361            } else {
362                break;
363            }
364
365            changed = true;
366        }
367
368        Ok((lhs, changed))
369    }
370}
371
372impl<'source> Parse<'source> for Binary {
373    fn std_parse(
374        input: &mut Parser<'source>,
375        recoverable_errors: &mut Vec<Error>
376    ) -> Result<Self, Vec<Error>> {
377        match input.try_parse().forward_errors(recoverable_errors)? {
378            Expr::Binary(binary) => Ok(binary),
379            _ => todo!(),
380        }
381    }
382}
383
384impl std::fmt::Display for Binary {
385    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
386        self.lhs.fmt(f)?;
387        self.op.fmt(f)?;
388        self.rhs.fmt(f)
389    }
390}
391
392impl Latex for Binary {
393    fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
394        match self.op.kind {
395            BinOpKind::Exp => fmt_pow(f, Some(&*self.lhs), Some(&*self.rhs)),
396            BinOpKind::Div => {
397                write!(f, "\\frac{{")?;
398                self.lhs.fmt_latex(f)?;
399                write!(f, "}}{{")?;
400                self.rhs.innermost().fmt_latex(f)?;
401                write!(f, "}}")
402            },
403            _ => {
404                self.lhs.fmt_latex(f)?;
405                self.op.fmt_latex(f)?;
406                self.rhs.fmt_latex(f)
407            },
408        }
409    }
410}