cas_parser/parser/ast/binary.rs
1use cas_error::Error;
2use crate::parser::{
3 ast::{
4 assign::{Assign as AssignExpr, AssignTarget},
5 expr::Expr,
6 range::{Range as RangeExpr, RangeKind},
7 unary::Unary,
8 },
9 error::NonFatal,
10 fmt::{Latex, fmt_pow},
11 token::{
12 op::{
13 AssignOp,
14 Associativity,
15 BinOp,
16 BinOpKind,
17 Precedence,
18 },
19 Assign,
20 RangeClosed,
21 RangeHalfOpen,
22 },
23 Parse,
24 Parser,
25 ParseResult,
26};
27use std::{fmt, ops::Range};
28
29#[cfg(feature = "serde")]
30use serde::{Deserialize, Serialize};
31
32/// A binary operator, including assignment.
33#[derive(Debug, Clone, PartialEq, Eq)]
34enum BinOpExt {
35 /// A binary operator, such as `+` or `*`.
36 Op(BinOp),
37
38 /// Implicit multiplication, such as `2x` or `x(x + 1)`.
39 ///
40 /// This is not a real operator, but it is treated as one for the purposes of parsing.
41 ImplicitMultiplication,
42
43 /// An assignment operator, such as `+=` or `/=`.
44 Assign(AssignOp),
45
46 /// A range operator, such as `..` or `..=`.
47 Range(RangeKind)
48}
49
50impl BinOpExt {
51 /// Returns the precedence of the binary operator.
52 fn precedence(&self) -> Precedence {
53 match self {
54 BinOpExt::Op(op) => op.precedence(),
55 BinOpExt::ImplicitMultiplication => Precedence::Factor,
56 BinOpExt::Assign(_) => Precedence::Assign,
57 BinOpExt::Range(_) => Precedence::Range,
58 }
59 }
60}
61
62impl From<BinOp> for BinOpExt {
63 fn from(op: BinOp) -> Self {
64 BinOpExt::Op(op)
65 }
66}
67
68impl From<AssignOp> for BinOpExt {
69 fn from(op: AssignOp) -> Self {
70 BinOpExt::Assign(op)
71 }
72}
73
74/// A binary expression, such as `1 + 2`. Binary expressions can include nested expressions.
75#[derive(Debug, Clone, PartialEq, Eq)]
76#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
77pub struct Binary {
78 /// The left-hand side of the binary expression.
79 pub lhs: Box<Expr>,
80
81 /// The operator of the binary expression.
82 pub op: BinOp,
83
84 /// The right-hand side of the binary expression.
85 pub rhs: Box<Expr>,
86
87 /// The region of the source code that this binary expression was parsed from.
88 pub span: Range<usize>,
89}
90
91impl Binary {
92 /// Returns the span of the binary expression.
93 pub fn span(&self) -> Range<usize> {
94 self.span.clone()
95 }
96
97 /// After parsing the left-hand-side, the operator, and the right-hand-side of a potential
98 /// binary expression, parse ahead to see if the right-hand-side is incomplete.
99 ///
100 /// If we are parsing the expression `1 + 2 * 3`, we will first parse the left-hand-side `1`,
101 /// then the operator `+`, then the right-hand-side `2`. However, before we build the
102 /// corresponding AST node, we should check if the operator after `2` has higher precedence
103 /// than `+` (if it exists).
104 ///
105 /// If it does, we should parse the expression starting with `2` first, so that we get `2 * 3`
106 /// as the right-hand-side to the `1 +` node. This works by calling into [`Self::parse_expr`]
107 /// again, but with `rhs` (`2` in this case) as the `lhs` argument.
108 ///
109 /// If it does not (such as in the expression `3 * 2 + 1`), we build the AST node `3 * 2`
110 /// first. Then, [`Self::parse_expr`] will pick up the `+ 1` part of the expression, and
111 /// build the AST node `3 * 2 + 1`.
112 ///
113 /// Implicit multiplication is also handled here. In an expression such as `1 + 2x y`, the
114 /// first call to [`Self::parse_expr`] will parse `1 + 2`. However, there is no operator after
115 /// `2`, so instead, we assume an implicit multiplication operator, because multiplication has
116 /// higher precedence than addition, then continue with the same procedure as if the operator
117 /// did exist.
118 ///
119 /// There is one distinction we must make when parsing implicit multiplication. Since we're
120 /// essentially creating multiplication out of thin air, [`Self::complete_rhs`] can get into an
121 /// infinite loop if we're not careful!
122 ///
123 /// Consider the expression `1 + 2x`. The first call to [`Self::complete_rhs`] will contain the
124 /// left-hand-side `1`, the operator `+`, and the right-hand-side `2`. Since there is no
125 /// operator after `2`, we assume implicit multiplication (higher precedence than `+`), and
126 /// successfully parse `2x`. At this point, we've returned to the first call to
127 /// [`Self::complete_rhs`], where the right-hand-side is now `2x`. However, there is still no
128 /// operator after `2x`, so we assume implicit multiplication again, but parse nothing;
129 /// [`Self::parse_expr`] would simply return `2x` as-is, and we would return to the first call
130 /// to [`Self::complete_rhs`] again.
131 ///
132 /// Thankfully, the solution is simple: just check if `2x` is returned as-is! If so, there is
133 /// no expression after `2x`, so we should break out of the loop and return the AST node
134 /// `1 + 2x`. This is the purpose of the `changed` boolean returned by [`Self::parse_expr`].
135 fn complete_rhs(
136 input: &mut Parser,
137 recoverable_errors: &mut Vec<Error>,
138 lhs: Expr,
139 op: BinOpExt,
140 mut rhs: Expr
141 ) -> Result<Expr, Vec<Error>> {
142 let precedence = op.precedence();
143
144 loop {
145 // before creating the `lhs op rhs` node, we should check the precedence of the
146 // following operator, if any
147 // this is because we can't parse an expression like `3 + 4 * 5`, as (3 + 4) * 5
148
149 // clone the input stream to emulate peeking
150 let mut input_ahead = input.clone();
151 if let Ok(next_op) = input_ahead.try_parse::<BinOp>().forward_errors(recoverable_errors) {
152 if next_op.precedence() > precedence || next_op.associativity() == Associativity::Right {
153 // this operator has a higher precedence or it is right associative, so we should
154 // parse its expression starting with `rhs` first
155 rhs = Self::parse_expr(input, recoverable_errors, rhs, next_op.precedence())?.0;
156 } else {
157 // this operator has lower precedence, or equal precedence and
158 // left-associativity; this is in scenarios like:
159 // `1 * 2 + 3` or `1 * 2 * 3`
160 // prec(+) < prec(*), prec(*) == prec(*)
161 //
162 // so just break out of the loop and let `lhs` become `1 * 2`
163 // we will parse this operator on the next iteration of the outside loop
164 break;
165 }
166 } else if input_ahead.try_parse::<Assign>().is_ok() {
167 // assignment is right-associative, so we should parse its expression starting with
168 // `rhs` first
169 if Precedence::Assign >= precedence {
170 rhs = Self::parse_expr(input, recoverable_errors, rhs, Precedence::Assign)?.0;
171 } else {
172 break;
173 }
174 } else {
175 // there is no operator; check if there is a valid expression after
176 // if there is, this could be implicit multiplication
177 //
178 // first, check if the previous operator has higher or equal precedence; if so, we
179 // cannot give priority to implicit multiplication
180 if precedence >= BinOpKind::Mul.precedence() {
181 break;
182 }
183
184 // then check if there is significant whitespace after `rhs`; if there is, we cannot
185 // parse implicit multiplication, as that would be confusing
186 input_ahead.advance_past_non_significant_whitespace();
187 if let Some(token) = input_ahead.current_token() {
188 if token.kind.is_significant_whitespace() {
189 break;
190 }
191 }
192
193 let (expr, changed) = Self::parse_expr(input, recoverable_errors, rhs, BinOpKind::Mul.precedence())?;
194
195 // `rhs = expr;` must happen in all cases, even if `changed` is false, otherwise it
196 // would've been moved into `Self::parse_expr` above
197 rhs = expr;
198
199 if !changed {
200 break;
201 }
202 }
203 }
204
205 // create the binary node representing `lhs op rhs`
206 let (start_span, end_span) = (lhs.span().start, rhs.span().end);
207 match op {
208 BinOpExt::Op(op) => Ok(Expr::Binary(Binary {
209 lhs: Box::new(lhs),
210 op,
211 rhs: Box::new(rhs),
212 span: start_span..end_span,
213 })),
214 BinOpExt::ImplicitMultiplication => {
215 let op_span = lhs.span().end..rhs.span().start;
216 Ok(Expr::Binary(Binary {
217 lhs: Box::new(lhs),
218 op: BinOp {
219 kind: BinOpKind::Mul,
220 implicit: true,
221 span: op_span,
222 },
223 rhs: Box::new(rhs),
224 span: start_span..end_span,
225 }))
226 },
227 BinOpExt::Assign(op) => Ok(Expr::Assign(AssignExpr {
228 target: AssignTarget::try_from_with_op(lhs, &op).forward_errors(recoverable_errors)?,
229 op,
230 value: Box::new(rhs),
231 span: start_span..end_span,
232 })),
233 BinOpExt::Range(kind) => Ok(Expr::Range(RangeExpr {
234 start: Box::new(lhs),
235 end: Box::new(rhs),
236 kind,
237 span: start_span..end_span,
238 })),
239 }
240 }
241
242 /// After parsing the left-hand-side of a potential binary expression, parse ahead to see if
243 /// there is a binary operator and a right-hand-side.
244 ///
245 /// See [`Self::complete_rhs`] for more information about the return value of this function.
246 pub(crate) fn parse_expr(
247 input: &mut Parser,
248 recoverable_errors: &mut Vec<Error>,
249 mut lhs: Expr,
250 precedence: Precedence
251 ) -> Result<(Expr, bool), Vec<Error>> {
252 let mut changed = false;
253 loop {
254 let mut input_ahead = input.clone();
255 if let Ok(op) = input_ahead.try_parse_then::<BinOp, _>(|bin_op, input| {
256 if bin_op.precedence() >= precedence {
257 ParseResult::Ok(())
258 } else {
259 ParseResult::Unrecoverable(vec![input.error(NonFatal)])
260 }
261 }).forward_errors(recoverable_errors) {
262 input.set_cursor(&input_ahead);
263 let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
264 lhs = Self::complete_rhs(input, recoverable_errors, lhs, op.into(), rhs)?;
265 } else if let Ok(assign) = input_ahead.try_parse_then::<AssignOp, _>(|_, input| {
266 if Precedence::Assign >= precedence {
267 ParseResult::Ok(())
268 } else {
269 ParseResult::Unrecoverable(vec![input.error(NonFatal)])
270 }
271 }).forward_errors(recoverable_errors) {
272 // assignment is also a binary expression, however it requires special handling
273 // because not all expressions are valid as the left-hand side of an assignment
274 // expression, and there is some syntax is only valid in the context of an
275 // assignment expression (i.e. function headers)
276 input.set_cursor(&input_ahead);
277 let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
278 lhs = Self::complete_rhs(input, recoverable_errors, lhs, assign.into(), rhs)?;
279 } else if input_ahead.try_parse_then::<RangeHalfOpen, _>(|_, input| {
280 if Precedence::Range >= precedence {
281 ParseResult::Ok(())
282 } else {
283 ParseResult::Unrecoverable(vec![input.error(NonFatal)])
284 }
285 }).is_ok() {
286 // range expressions are also binary expressions, but they require special handling
287 // because they are not valid in all contexts
288 input.set_cursor(&input_ahead);
289 let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
290 lhs = Self::complete_rhs(
291 input,
292 recoverable_errors,
293 lhs,
294 BinOpExt::Range(RangeKind::HalfOpen),
295 rhs,
296 )?;
297 } else if input_ahead.try_parse_then::<RangeClosed, _>(|_, input| {
298 if Precedence::Range >= precedence {
299 ParseResult::Ok(())
300 } else {
301 ParseResult::Unrecoverable(vec![input.error(NonFatal)])
302 }
303 }).is_ok() {
304 input.set_cursor(&input_ahead);
305 let rhs = Unary::parse_or_lower(input, recoverable_errors)?;
306 lhs = Self::complete_rhs(
307 input,
308 recoverable_errors,
309 lhs,
310 BinOpExt::Range(RangeKind::Closed),
311 rhs,
312 )?;
313 } else if BinOpKind::Mul.precedence() >= precedence {
314 // implicit multiplication test
315
316 // do not continue if there is significant whitespace after `lhs`
317 input_ahead.advance_past_non_significant_whitespace();
318 if let Some(token) = input_ahead.current_token() {
319 if token.kind.is_significant_whitespace() {
320 break;
321 }
322 }
323
324 // ensure that we get here because there is *no* operator, not because the operator
325 // has lower precedence
326 if input_ahead.try_parse_then::<BinOp, _>(|op, input| {
327 if op.precedence() > BinOpKind::Mul.precedence() {
328 ParseResult::Unrecoverable(vec![input.error(NonFatal)])
329 } else {
330 ParseResult::Ok(())
331 }
332 }).is_ok() {
333 break;
334 }
335
336 // if there is no expression, there is no implicit multiplication and all our
337 // attempts to parse a binary expression fail
338 let mut inner_recoverable_errors = Vec::new();
339 let Ok(rhs) = Unary::parse_or_lower(&mut input_ahead, &mut inner_recoverable_errors) else {
340 break;
341 };
342
343 if rhs.is_implicit_mul_target() {
344 // TODO: this can be refactored with input.try_parse_with_fn once Try trait is
345 // stabilized, which would make ParseResult so much nicer to work with
346
347 // add the recoverable errors from the inner parser to the outer parser, since
348 // we know now implicit multiplication is the correct branch to take
349 recoverable_errors.extend(inner_recoverable_errors);
350 input.set_cursor(&input_ahead);
351 lhs = Self::complete_rhs(
352 input,
353 recoverable_errors,
354 lhs,
355 BinOpExt::ImplicitMultiplication,
356 rhs,
357 )?;
358 } else {
359 break;
360 }
361 } else {
362 break;
363 }
364
365 changed = true;
366 }
367
368 Ok((lhs, changed))
369 }
370}
371
372impl<'source> Parse<'source> for Binary {
373 fn std_parse(
374 input: &mut Parser<'source>,
375 recoverable_errors: &mut Vec<Error>
376 ) -> Result<Self, Vec<Error>> {
377 match input.try_parse().forward_errors(recoverable_errors)? {
378 Expr::Binary(binary) => Ok(binary),
379 _ => todo!(),
380 }
381 }
382}
383
384impl std::fmt::Display for Binary {
385 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
386 self.lhs.fmt(f)?;
387 self.op.fmt(f)?;
388 self.rhs.fmt(f)
389 }
390}
391
392impl Latex for Binary {
393 fn fmt_latex(&self, f: &mut fmt::Formatter) -> fmt::Result {
394 match self.op.kind {
395 BinOpKind::Exp => fmt_pow(f, Some(&*self.lhs), Some(&*self.rhs)),
396 BinOpKind::Div => {
397 write!(f, "\\frac{{")?;
398 self.lhs.fmt_latex(f)?;
399 write!(f, "}}{{")?;
400 self.rhs.innermost().fmt_latex(f)?;
401 write!(f, "}}")
402 },
403 _ => {
404 self.lhs.fmt_latex(f)?;
405 self.op.fmt_latex(f)?;
406 self.rhs.fmt_latex(f)
407 },
408 }
409 }
410}