cas_parser/parser/ast/binary.rs
1use crate::parser::{
2 ast::{
3 assign::{Assign as AssignExpr, AssignTarget},
4 expr::Expr,
5 unary::Unary,
6 },
7 error::{kind, Error},
8 fmt::{Latex, fmt_pow},
9 token::{op::{AssignOp, Associativity, BinOp, BinOpKind, Precedence}, Assign},
10 Parse,
11 Parser,
12 ParseResult,
13};
14use std::{fmt, ops::Range};
15
16#[cfg(feature = "serde")]
17use serde::{Deserialize, Serialize};
18
19/// A binary operator, including assignment.
20#[derive(Debug, Clone, PartialEq)]
21enum BinOpExt {
22 /// A binary operator, such as `+` or `*`.
23 Op(BinOp),
24
25 /// Implicit multiplication, such as `2x` or `x(x + 1)`.
26 ///
27 /// This is not a real operator, but it is treated as one for the purposes of parsing.
28 ImplicitMultiplication,
29
30 /// An assignment operator, such as `+=` or `/=`.
31 Assign(AssignOp),
32}
33
34impl BinOpExt {
35 /// Returns the precedence of the binary operator.
36 fn precedence(&self) -> Precedence {
37 match self {
38 BinOpExt::Op(op) => op.precedence(),
39 BinOpExt::ImplicitMultiplication => Precedence::Factor,
40 BinOpExt::Assign(_) => Precedence::Assign,
41 }
42 }
43}
44
45impl From<BinOp> for BinOpExt {
46 fn from(op: BinOp) -> Self {
47 BinOpExt::Op(op)
48 }
49}
50
51impl From<AssignOp> for BinOpExt {
52 fn from(op: AssignOp) -> Self {
53 BinOpExt::Assign(op)
54 }
55}
56
57/// A binary expression, such as `1 + 2`. Binary expressions can include nested expressions.
58#[derive(Debug, Clone, PartialEq)]
59#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
60pub struct Binary {
61 /// The left-hand side of the binary expression.
62 pub lhs: Box<Expr>,
63
64 /// The operator of the binary expression.
65 pub op: BinOp,
66
67 /// The right-hand side of the binary expression.
68 pub rhs: Box<Expr>,
69
70 /// The region of the source code that this binary expression was parsed from.
71 pub span: Range<usize>,
72}
73
74impl Binary {
75 /// Returns the span of the binary expression.
76 pub fn span(&self) -> Range<usize> {
77 self.span.clone()
78 }
79
80 /// After parsing the left-hand-side, the operator, and the right-hand-side of a potential
81 /// binary expression, parse ahead to see if the right-hand-side is incomplete.
82 ///
83 /// If we are parsing the expression `1 + 2 * 3`, we will first parse the left-hand-side `1`,
84 /// then the operator `+`, then the right-hand-side `2`. However, before we build the
85 /// corresponding AST node, we should check if the operator after `2` has higher precedence
86 /// than `+` (if it exists).
87 ///
88 /// If it does, we should parse the expression starting with `2` first, so that we get `2 * 3`
89 /// as the right-hand-side to the `1 +` node. This works by calling into [`Self::parse_expr`]
90 /// again, but with `rhs` (`2` in this case) as the `lhs` argument.
91 ///
92 /// If it does not (such as in the expression `3 * 2 + 1`), we build the AST node `3 * 2`
93 /// first. Then, [`Self::parse_expr`] will pick up the `+ 1` part of the expression, and
94 /// build the AST node `3 * 2 + 1`.
95 ///
96 /// Implicit multiplication is also handled here. In an expression such as `1 + 2x y`, the
97 /// first call to [`Self::parse_expr`] will parse `1 + 2`. However, there is no operator after
98 /// `2`, so instead, we assume an implicit multiplication operator, because multiplication has
99 /// higher precedence than addition, then continue with the same procedure as if the operator
100 /// did exist.
101 ///
102 /// There is one distinction we must make when parsing implicit multiplication. Since we're
103 /// essentially creating multiplication out of thin air, [`Self::complete_rhs`] can get into an
104 /// infinite loop if we're not careful!
105 ///
106 /// Consider the expression `1 + 2x`. The first call to [`Self::complete_rhs`] will contain the
107 /// left-hand-side `1`, the operator `+`, and the right-hand-side `2`. Since there is no
108 /// operator after `2`, we assume implicit multiplication (higher precedence than `+`), and
109 /// successfully parse `2x`. At this point, we've returned to the first call to
110 /// [`Self::complete_rhs`], where the right-hand-side is now `2x`. However, there is still no
111 /// operator after `2x`, so we assume implicit multiplication again, but parse nothing;
112 /// [`Self::parse_expr`] would simply return `2x` as-is, and we would return to the first call
113 /// to [`Self::complete_rhs`] again.
114 ///
115 /// Thankfully, the solution is simple: just check if `2x` is returned as-is! If so, there is
116 /// no expression after `2x`, so we should break out of the loop and return the AST node
117 /// `1 + 2x`. This is the purpose of the `changed` boolean returned by [`Self::parse_expr`].
118 fn complete_rhs(
119 input: &mut Parser,
120 recoverable_errors: &mut Vec<Error>,
121 lhs: Expr,
122 op: BinOpExt,
123 mut rhs: Expr
124 ) -> Result<Expr, Vec<Error>> {
125 let precedence = op.precedence();
126
127 loop {
128 // before creating the `lhs op rhs` node, we should check the precedence of the
129 // following operator, if any
130 // this is because we can't parse an expression like `3 + 4 * 5`, as (3 + 4) * 5
131
132 // clone the input stream to emulate peeking
133 let mut input_ahead = input.clone();
134 if let Ok(next_op) = input_ahead.try_parse::<BinOp>().forward_errors(recoverable_errors) {
135 if next_op.precedence() > precedence || next_op.associativity() == Associativity::Right {
136 // this operator has a higher precedence or it is right associative, so we should
137 // parse its expression starting with `rhs` first
138 rhs = Self::parse_expr(input, recoverable_errors, rhs, next_op.precedence())?.0;
139 } else {
140 // this operator has lower precedence, or equal precedence and
141 // left-associativity; this is in scenarios like:
142 // `1 * 2 + 3` or `1 * 2 * 3`
143 // prec(+) < prec(*), prec(*) == prec(*)
144 //
145 // so just break out of the loop and let `lhs` become `1 * 2`
146 // we will parse this operator on the next iteration of the outside loop
147 break;
148 }
149 } else if input_ahead.try_parse::<Assign>().is_ok() {
150 // assignment is right-associative, so we should parse its expression starting with
151 // `rhs` first
152 if Precedence::Assign >= precedence {
153 rhs = Self::parse_expr(input, recoverable_errors, rhs, Precedence::Assign)?.0;
154 } else {
155 break;
156 }
157 } else {
158 // there is no operator; check if there is a valid expression after
159 // if there is, this could be implicit multiplication
160 //
161 // first, check if the previous operator has higher or equal precedence; if so, we
162 // cannot give priority to implicit multiplication
163 if precedence >= BinOpKind::Mul.precedence() {
164 break;
165 }
166
167 // then check if there is significant whitespace after `rhs`; if there is, we cannot
168 // parse implicit multiplication, as that would be confusing
169 input_ahead.advance_past_non_significant_whitespace();
170 if let Some(token) = input_ahead.current_token() {
171 if token.kind.is_significant_whitespace() {
172 break;
173 }
174 }
175
176 let (expr, changed) = Self::parse_expr(input, recoverable_errors, rhs, BinOpKind::Mul.precedence())?;
177
178 // `rhs = expr;` must happen in all cases, even if `changed` is false, otherwise it
179 // would've been moved into `Self::parse_expr` above
180 rhs = expr;
181
182 if !changed {
183 break;
184 }
185 }
186 }
187
188 // create the binary node representing `lhs op rhs`
189 let (start_span, end_span) = (lhs.span().start, rhs.span().end);
190 match op {
191 BinOpExt::Op(op) => Ok(Expr::Binary(Binary {
192 lhs: Box::new(lhs),
193 op,
194 rhs: Box::new(rhs),
195 span: start_span..end_span,
196 })),
197 BinOpExt::ImplicitMultiplication => {
198 let op_span = lhs.span().end..rhs.span().start;
199 Ok(Expr::Binary(Binary {
200 lhs: Box::new(lhs),
201 op: BinOp {
202 kind: BinOpKind::Mul,
203 implicit: true,
204 span: op_span,
205 },
206 rhs: Box::new(rhs),
207 span: start_span..end_span,
208 }))
209 },
210 BinOpExt::Assign(op) => Ok(Expr::Assign(AssignExpr {
211 target: AssignTarget::try_from_with_op(lhs, &op).forward_errors(recoverable_errors)?,
212 op,
213 value: Box::new(rhs),
214 span: start_span..end_span,
215 })),
216 }
217 }
218
219 /// After parsing the left-hand-side of a potential binary expression, parse ahead to see if
220 /// there is a binary operator and a right-hand-side.
221 ///
222 /// See [`Self::complete_rhs`] for more information about the return value of this function.
223 pub(crate) fn parse_expr(
224 input: &mut Parser,
225 recoverable_errors: &mut Vec<Error>,
226 mut lhs: Expr,
227 precedence: Precedence
228 ) -> Result<(Expr, bool), Vec<Error>> {
229 let mut changed = false;
230 loop {
231 let mut input_ahead = input.clone();
232 if let Ok(op) = input_ahead.try_parse_then::<BinOp, _>(|bin_op, input| {
233 if bin_op.precedence() >= precedence {
234 ParseResult::Ok(())
235 } else {
236 ParseResult::Unrecoverable(vec![input.error(kind::NonFatal)])
237 }
238 }).forward_errors(recoverable_errors) {
239 input.set_cursor(&input_ahead);
240 let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
241 lhs = Self::complete_rhs(input, recoverable_errors, lhs, op.into(), rhs)?;
242 } else if let Ok(assign) = input_ahead.try_parse_then::<AssignOp, _>(|_, input| {
243 if Precedence::Assign >= precedence {
244 ParseResult::Ok(())
245 } else {
246 ParseResult::Unrecoverable(vec![input.error(kind::NonFatal)])
247 }
248 }).forward_errors(recoverable_errors) {
249 // assignment is also a binary expression, however it requires special handling
250 // because not all expressions are valid as the left-hand side of an assignment
251 // expression, and there is some syntax is only valid in the context of an
252 // assignment expression (i.e. function headers)
253 input.set_cursor(&input_ahead);
254 let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
255 lhs = Self::complete_rhs(input, recoverable_errors, lhs, assign.into(), rhs)?;
256 } else if BinOpKind::Mul.precedence() >= precedence {
257 // implicit multiplication test
258
259 // do not continue if there is significant whitespace after `lhs`
260 input_ahead.advance_past_non_significant_whitespace();
261 if let Some(token) = input_ahead.current_token() {
262 if token.kind.is_significant_whitespace() {
263 break;
264 }
265 }
266
267 // ensure that we get here because there is *no* operator, not because the operator
268 // has lower precedence
269 if input_ahead.try_parse_then::<BinOp, _>(|op, input| {
270 if op.precedence() > BinOpKind::Mul.precedence() {
271 ParseResult::Unrecoverable(vec![input.error(kind::NonFatal)])
272 } else {
273 ParseResult::Ok(())
274 }
275 }).is_ok() {
276 break;
277 }
278
279 // if there is no expression, there is no implicit multiplication and all our
280 // attempts to parse a binary expression fail
281 let Ok(rhs) = Unary::parse_or_lower(input, recoverable_errors) else {
282 break;
283 };
284 lhs = Self::complete_rhs(input, recoverable_errors, lhs, BinOpExt::ImplicitMultiplication, rhs)?;
285 } else {
286 break;
287 }
288
289 changed = true;
290 }
291
292 Ok((lhs, changed))
293 }
294}
295
296impl<'source> Parse<'source> for Binary {
297 fn std_parse(
298 input: &mut Parser<'source>,
299 recoverable_errors: &mut Vec<Error>
300 ) -> Result<Self, Vec<Error>> {
301 match input.try_parse().forward_errors(recoverable_errors)? {
302 Expr::Binary(binary) => Ok(binary),
303 _ => todo!(),
304 }
305 }
306}
307
308impl std::fmt::Display for Binary {
309 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
310 self.lhs.fmt(f)?;
311 self.op.fmt(f)?;
312 self.rhs.fmt(f)
313 }
314}
315
316impl Latex for Binary {
317 fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
318 match self.op.kind {
319 BinOpKind::Exp => fmt_pow(f, Some(&*self.lhs), Some(&*self.rhs)),
320 BinOpKind::Div => {
321 write!(f, "\\frac{{")?;
322 self.lhs.fmt_latex(f)?;
323 write!(f, "}}{{")?;
324 self.rhs.innermost().fmt_latex(f)?;
325 write!(f, "}}")
326 },
327 _ => {
328 self.lhs.fmt_latex(f)?;
329 self.op.fmt_latex(f)?;
330 self.rhs.fmt_latex(f)
331 },
332 }
333 }
334}