vegafusion_core/expression/
parser.rs

1use crate::error::{Result, ResultWithContext, VegaFusionError};
2use crate::expression::lexer::{tokenize, Token};
3use crate::expression::ops::{binary_op_from_token, logical_op_from_token, unary_op_from_token};
4use crate::proto::gen::expression::expression::Expr;
5
6use crate::proto::gen::expression::{
7    ArrayExpression, BinaryExpression, BinaryOperator, CallExpression, ConditionalExpression,
8    Expression, Identifier, Literal, LogicalExpression, LogicalOperator, MemberExpression,
9    ObjectExpression, Property, Span, UnaryExpression, UnaryOperator,
10};
11
12pub fn parse(expr: &str) -> Result<Expression> {
13    let mut tokens = tokenize(expr)?;
14    let result = perform_parse(&mut tokens, 0.0, expr)?;
15    if !tokens.is_empty() {
16        let (token, start, _) = &tokens[0];
17        return Err(VegaFusionError::parse(format!(
18            "Unexpected token {token} at position {start} in expression: {expr}"
19        )));
20    }
21
22    Ok(result)
23}
24
25fn perform_parse(
26    tokens: &mut Vec<(Token, usize, usize)>,
27    min_bp: f64,
28    full_expr: &str,
29) -> Result<Expression> {
30    if tokens.is_empty() {
31        return Err(VegaFusionError::parse("Unexpected end of expression"));
32    }
33
34    // Pop leading token
35    let (lhs_token, start, end) = tokens[0].clone();
36    tokens.remove(0);
37
38    // parse form that starts with lhs_token
39    let lhs_result = if is_atom(&lhs_token) {
40        parse_atom(&lhs_token, start, end)
41    } else if let Ok(op) = unary_op_from_token(&lhs_token) {
42        // Unary expression
43        parse_unary(tokens, op, start, full_expr)
44    } else if lhs_token == Token::OpenParen {
45        // Arbitrary expression inside parans
46        parse_paren_grouping(tokens, full_expr)
47    } else if lhs_token == Token::OpenSquare {
48        // Array literal expression
49        parse_array(tokens, start, full_expr)
50    } else if lhs_token == Token::OpenCurly {
51        // Object literal expression
52        parse_object(tokens, start, full_expr)
53    } else {
54        Err(VegaFusionError::parse(format!(
55            "Unexpected token: {lhs_token}"
56        )))
57    };
58
59    let mut lhs = lhs_result.with_context(|| {
60        format!("Failed to parse form starting at position {start} in expression: {full_expr}")
61    })?;
62
63    // pop tokens and add to lhs expression
64    while !tokens.is_empty() {
65        let (token, start, _) = &tokens[0];
66        let start = *start;
67
68        // Check for tokens that always close expressions. If found, break out of while loop
69        match token {
70            Token::CloseParen
71            | Token::CloseCurly
72            | Token::CloseSquare
73            | Token::Comma
74            | Token::Colon => break,
75            _ => {}
76        }
77
78        let expr_result: Result<Expression> = if let Ok(op) = binary_op_from_token(token) {
79            if let Some(new_lhs_result) = parse_binary(tokens, op, &lhs, min_bp, start, full_expr) {
80                new_lhs_result
81            } else {
82                break;
83            }
84        } else if let Ok(op) = logical_op_from_token(token) {
85            if let Some(new_lhs_result) = parse_logical(tokens, op, &lhs, min_bp, start, full_expr)
86            {
87                new_lhs_result
88            } else {
89                break;
90            }
91        } else if token == &Token::OpenParen {
92            // Function call (e.g. foo(bar))
93            if let Some(new_lhs_result) = parse_call(tokens, &lhs, min_bp, start, full_expr) {
94                new_lhs_result
95            } else {
96                break;
97            }
98        } else if token == &Token::OpenSquare {
99            // computed object/array membership (e.g. foo['bar'])
100            if let Some(new_lhs_result) =
101                parse_computed_member(tokens, &lhs, min_bp, start, full_expr)
102            {
103                new_lhs_result
104            } else {
105                break;
106            }
107        } else if token == &Token::Dot {
108            // static property membership (e.g. foo.bar)
109            if let Some(new_lhs_result) =
110                parse_static_member(tokens, &lhs, min_bp, start, full_expr)
111            {
112                new_lhs_result
113            } else {
114                break;
115            }
116        } else if token == &Token::Question {
117            // ternary operator (e.g. foo ? bar: baz)
118            if let Some(new_lhs_result) = parse_ternary(tokens, &lhs, min_bp, start, full_expr) {
119                new_lhs_result
120            } else {
121                break;
122            }
123        } else {
124            Err(VegaFusionError::parse(format!(
125                "Unexpected token '{token}'"
126            )))
127        };
128
129        lhs = expr_result.with_context(|| {
130            format!("Failed to parse form starting at position {start} in expression: {full_expr}")
131        })?;
132    }
133
134    Ok(lhs)
135}
136
137pub fn expect_token(
138    tokens: &mut Vec<(Token, usize, usize)>,
139    expected: Token,
140) -> Result<(Token, usize, usize)> {
141    if tokens.is_empty() {
142        return Err(VegaFusionError::parse(format!(
143            "Expected {expected}, reached end of expression"
144        )));
145    }
146    let (token, start, end) = tokens[0].clone();
147    if token != expected {
148        return Err(VegaFusionError::parse(format!(
149            "Expected {expected}, received {token}"
150        )));
151    }
152    tokens.remove(0);
153    Ok((token, start, end))
154}
155
156/// Check whether token is an atomic Expression
157pub fn is_atom(token: &Token) -> bool {
158    matches!(
159        token,
160        Token::Null
161            | Token::Number { .. }
162            | Token::Identifier { .. }
163            | Token::String { .. }
164            | Token::Bool { .. }
165    )
166}
167
168/// Parse atom token to Expression
169pub fn parse_atom(token: &Token, start: usize, end: usize) -> Result<Expression> {
170    let span = Span {
171        start: start as i32,
172        end: end as i32,
173    };
174
175    let expr = match token {
176        Token::Null => Expr::from(Literal::null()),
177        Token::Bool { value, raw } => Expr::from(Literal::new(*value, raw)),
178        Token::Number { value, raw } => Expr::from(Literal::new(*value, raw)),
179        Token::String { value, raw } => Expr::from(Literal::new(value.clone(), raw)),
180        Token::Identifier { value } => Expr::from(Identifier::new(value)),
181        _ => {
182            return Err(VegaFusionError::parse(format!(
183                "Token not an atom: {token}"
184            )))
185        }
186    };
187
188    Ok(Expression {
189        expr: Some(expr),
190        span: Some(span),
191    })
192}
193
194pub fn parse_unary(
195    tokens: &mut Vec<(Token, usize, usize)>,
196    op: UnaryOperator,
197    start: usize,
198    full_expr: &str,
199) -> Result<Expression> {
200    let unary_bp = op.unary_binding_power();
201    let rhs = perform_parse(tokens, unary_bp, full_expr)?;
202    let new_span = Span {
203        start: start as i32,
204        end: rhs.span.unwrap().end,
205    };
206    let expr = Expr::from(UnaryExpression::new(&op, rhs));
207    Ok(Expression::new(expr, Some(new_span)))
208}
209
210pub fn parse_binary(
211    tokens: &mut Vec<(Token, usize, usize)>,
212    op: BinaryOperator,
213    lhs: &Expression,
214    min_bp: f64,
215    start: usize,
216    full_expr: &str,
217) -> Option<Result<Expression>> {
218    // Infix operator
219    let (left_bp, right_bp) = op.infix_binding_power();
220    if left_bp < min_bp {
221        return None;
222    }
223
224    // Commit to processing operator token
225    tokens.remove(0);
226
227    Some(match perform_parse(tokens, right_bp, full_expr) {
228        Ok(rhs) => {
229            // Update lhs
230            let new_span = Span {
231                start: start as i32,
232                end: rhs.span.unwrap().end,
233            };
234            let expr = Expr::from(BinaryExpression::new(lhs.clone(), &op, rhs));
235            Ok(Expression::new(expr, Some(new_span)))
236        }
237        Err(err) => Err(err),
238    })
239}
240
241pub fn parse_logical(
242    tokens: &mut Vec<(Token, usize, usize)>,
243    op: LogicalOperator,
244    lhs: &Expression,
245    min_bp: f64,
246    start: usize,
247    full_expr: &str,
248) -> Option<Result<Expression>> {
249    // Infix operator
250    let (left_bp, right_bp) = op.infix_binding_power();
251    if left_bp < min_bp {
252        return None;
253    }
254    // Commit to processing operator token
255    tokens.remove(0);
256
257    Some(match perform_parse(tokens, right_bp, full_expr) {
258        Ok(rhs) => {
259            // Update lhs
260            let new_span = Span {
261                start: start as i32,
262                end: rhs.span.unwrap().end,
263            };
264            let expr = Expr::from(LogicalExpression::new(lhs.clone(), &op, rhs));
265            Ok(Expression::new(expr, Some(new_span)))
266        }
267        Err(err) => Err(err),
268    })
269}
270
271pub fn parse_call(
272    tokens: &mut Vec<(Token, usize, usize)>,
273    lhs: &Expression,
274    min_bp: f64,
275    start: usize,
276    full_expr: &str,
277) -> Option<Result<Expression>> {
278    let lhs = match lhs
279        .as_identifier()
280        .with_context(|| "Only global functions are callable")
281    {
282        Ok(identifier) => identifier,
283        Err(err) => return Some(Err(err)),
284    };
285
286    // For precedence, see
287    // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
288    let computed_member_bp = 20.0;
289    if min_bp >= computed_member_bp {
290        return None;
291    }
292
293    // Opening paren
294    expect_token(tokens, Token::OpenParen).unwrap();
295
296    // Parse arguments
297    let mut arguments: Vec<Expression> = Vec::new();
298    while !tokens.is_empty() && tokens[0].0 != Token::CloseParen {
299        let parsed_arg = perform_parse(tokens, 1.0, full_expr);
300        match parsed_arg {
301            Ok(parsed_arg) => {
302                arguments.push(parsed_arg);
303
304                // Remove comma token, if any
305                expect_token(tokens, Token::Comma).ok();
306            }
307            Err(err) => return Some(Err(err)),
308        }
309    }
310
311    // Closing paren
312    let (_, _, end) = expect_token(tokens, Token::CloseParen).unwrap();
313
314    // Update span
315    let new_span = Span {
316        start: start as i32,
317        end: end as i32,
318    };
319    let expr = Expr::from(CallExpression::new(&lhs.name, arguments));
320    Some(Ok(Expression::new(expr, Some(new_span))))
321}
322
323pub fn parse_computed_member(
324    tokens: &mut Vec<(Token, usize, usize)>,
325    lhs: &Expression,
326    min_bp: f64,
327    start: usize,
328    full_expr: &str,
329) -> Option<Result<Expression>> {
330    let computed_member_bp = 20.0;
331    if min_bp >= computed_member_bp {
332        return None;
333    }
334
335    // Opening bracket
336    expect_token(tokens, Token::OpenSquare).unwrap();
337
338    // Property expression
339    Some(match perform_parse(tokens, 1.0, full_expr) {
340        Ok(property) => {
341            // Closing bracket
342            let (_, _, end) = expect_token(tokens, Token::CloseSquare).unwrap();
343
344            // Update span
345            let new_span = Span {
346                start: start as i32,
347                end: end as i32,
348            };
349
350            let expr = Expr::from(MemberExpression::new_computed(lhs.clone(), property));
351            Ok(Expression::new(expr, Some(new_span)))
352        }
353        Err(err) => Err(err),
354    })
355}
356
357pub fn parse_static_member(
358    tokens: &mut Vec<(Token, usize, usize)>,
359    lhs: &Expression,
360    min_bp: f64,
361    start: usize,
362    full_expr: &str,
363) -> Option<Result<Expression>> {
364    let computed_member_bp = 20.0;
365    if min_bp >= computed_member_bp {
366        return None;
367    }
368
369    // Dot
370    expect_token(tokens, Token::Dot).unwrap();
371
372    // Property expression
373    Some(match perform_parse(tokens, 1000.0, full_expr) {
374        Ok(property) => {
375            // Update span
376            let new_span = Span {
377                start: start as i32,
378                end: property.span.unwrap().end,
379            };
380
381            let expr = match MemberExpression::new_static(lhs.clone(), property) {
382                Ok(member) => Expr::from(member),
383                Err(err) => return Some(Err(err)),
384            };
385            Ok(Expression::new(expr, Some(new_span)))
386        }
387        Err(err) => Err(err),
388    })
389}
390
391pub fn parse_ternary(
392    tokens: &mut Vec<(Token, usize, usize)>,
393    lhs: &Expression,
394    min_bp: f64,
395    start: usize,
396    full_expr: &str,
397) -> Option<Result<Expression>> {
398    let (left_bp, middle_bp, right_bp) = ConditionalExpression::ternary_binding_power();
399    if min_bp >= left_bp {
400        return None;
401    }
402
403    // Question mark
404    expect_token(tokens, Token::Question).unwrap();
405
406    // Parse consequent
407    let consequent = if let Ok(consequent) = perform_parse(tokens, middle_bp, full_expr) {
408        consequent
409    } else {
410        return Some(Err(VegaFusionError::parse(
411            "Failed to parse consequent of ternary operator",
412        )));
413    };
414
415    // Colon
416    expect_token(tokens, Token::Colon).unwrap();
417
418    // Parse alternate
419    let alternate = if let Ok(alternate) = perform_parse(tokens, right_bp, full_expr) {
420        alternate
421    } else {
422        return Some(Err(VegaFusionError::parse(
423            "Failed to parse alternate of ternary operator",
424        )));
425    };
426
427    // Update span
428    let new_span = Span {
429        start: start as i32,
430        end: alternate.span.unwrap().end,
431    };
432
433    let expr = Expr::from(ConditionalExpression::new(
434        lhs.clone(),
435        consequent,
436        alternate,
437    ));
438    Some(Ok(Expression::new(expr, Some(new_span))))
439}
440
441pub fn parse_paren_grouping(
442    tokens: &mut Vec<(Token, usize, usize)>,
443    full_expr: &str,
444) -> Result<Expression> {
445    perform_parse(tokens, 0.0, full_expr).and_then(|new_lhs| {
446        expect_token(tokens, Token::CloseParen)?;
447        Ok(new_lhs)
448    })
449}
450
451pub fn parse_array(
452    tokens: &mut Vec<(Token, usize, usize)>,
453    start: usize,
454    full_expr: &str,
455) -> Result<Expression> {
456    let mut elements: Vec<Expression> = Vec::new();
457
458    while !tokens.is_empty() && tokens[0].0 != Token::CloseSquare {
459        elements.push(perform_parse(tokens, 1.0, full_expr)?);
460
461        // Remove single comma token, if any
462        expect_token(tokens, Token::Comma).ok();
463    }
464
465    // Closing bracket
466    let (_, _, end) = expect_token(tokens, Token::CloseSquare).unwrap();
467
468    // Update span
469    let new_span = Span {
470        start: start as i32,
471        end: end as i32,
472    };
473
474    let expr = Expr::from(ArrayExpression::new(elements));
475    Ok(Expression::new(expr, Some(new_span)))
476}
477
478pub fn parse_object(
479    tokens: &mut Vec<(Token, usize, usize)>,
480    start: usize,
481    full_expr: &str,
482) -> Result<Expression> {
483    let mut properties: Vec<Property> = Vec::new();
484
485    while !tokens.is_empty() && tokens[0].0 != Token::CloseCurly {
486        let key = match perform_parse(tokens, 1.0, full_expr) {
487            Ok(key) => key,
488            Err(err) => return Err(err.with_context(|| "Failed to parse object key".to_string())),
489        };
490
491        expect_token(tokens, Token::Colon)?;
492
493        let value = match perform_parse(tokens, 1.0, full_expr) {
494            Ok(key) => key,
495            Err(err) => {
496                return Err(err.with_context(|| "Failed to parse object property value".to_string()))
497            }
498        };
499
500        // Remove comma token, if any
501        expect_token(tokens, Token::Comma).ok();
502
503        let property = Property::try_new(key, value)?;
504        properties.push(property);
505    }
506
507    // Closing bracket
508    let (_, _, end) = expect_token(tokens, Token::CloseCurly).unwrap();
509
510    // Update span
511    let new_span = Span {
512        start: start as i32,
513        end: end as i32,
514    };
515
516    let expr = Expr::from(ObjectExpression::new(properties));
517    Ok(Expression::new(expr, Some(new_span)))
518}
519
520#[cfg(test)]
521mod test_parse {
522    use crate::expression::parser::parse;
523
524    #[test]
525    fn test_parse_atom() {
526        let node = parse("23.500000").unwrap();
527        assert_eq!(format!("{node}"), "23.5");
528
529        let node = parse("\"hello\"").unwrap();
530        assert_eq!(format!("{node}"), "\"hello\"");
531    }
532
533    #[test]
534    fn test_parse_binary() {
535        let node = parse("23.50 + foo * 87").unwrap();
536        assert_eq!(node.to_string(), "23.5 + foo * 87");
537    }
538
539    #[test]
540    fn test_parse_logical() {
541        let node = parse("false || (foo && bar)").unwrap();
542        assert_eq!(node.to_string(), "false || foo && bar");
543    }
544
545    #[test]
546    fn test_parse_prefix() {
547        let node = parse("-23.50 + +foo").unwrap();
548        assert_eq!(node.to_string(), "-23.5 + +foo");
549    }
550
551    #[test]
552    fn test_paren_grouping() {
553        let node = parse("-(23.50 + foo)").unwrap();
554        assert_eq!(node.to_string(), "-(23.5 + foo)");
555    }
556
557    #[test]
558    fn test_call() {
559        // One arg
560        let node = parse("foo(19.0)").unwrap();
561        assert_eq!(node.to_string(), "foo(19)");
562
563        // Zero args
564        let node = parse("foo()").unwrap();
565        assert_eq!(node.to_string(), "foo()");
566
567        // Two args
568        let node = parse("foo('a', 21)").unwrap();
569        assert_eq!(node.to_string(), "foo(\"a\", 21)");
570
571        // Two args, trailing comma
572        let node = parse("foo('a', 21,)").unwrap();
573        assert_eq!(node.to_string(), "foo(\"a\", 21)");
574    }
575
576    #[test]
577    fn test_computed_membership() {
578        let node = parse("foo[19.0]").unwrap();
579        assert_eq!(node.to_string(), "foo[19]");
580
581        let node = parse("foo['bar']").unwrap();
582        assert_eq!(node.to_string(), "foo[\"bar\"]");
583    }
584
585    #[test]
586    fn test_static_membership() {
587        let node = parse("foo.bar").unwrap();
588        assert_eq!(node.to_string(), "foo.bar");
589
590        let node = parse("foo.bar[2]").unwrap();
591        assert_eq!(node.to_string(), "foo.bar[2]");
592    }
593
594    #[test]
595    fn test_ternary() {
596        let node = parse("foo ? 2 + 3: 27").unwrap();
597        assert_eq!(node.to_string(), "foo ? 2 + 3: 27");
598
599        let node = parse("foo ? 2 + 3: 27 || 17").unwrap();
600        assert_eq!(node.to_string(), "foo ? 2 + 3: 27 || 17");
601
602        let node = parse("foo ? 2 + 3: (27 || 17)").unwrap();
603        assert_eq!(node.to_string(), "foo ? 2 + 3: 27 || 17");
604
605        let node = parse("(foo ? 2 + 3: 27) || 17").unwrap();
606        assert_eq!(node.to_string(), "(foo ? 2 + 3: 27) || 17");
607
608        // Check right associativity
609        let node = parse("c1 ? v1: c2 ? v2: c3 ? v3: v4").unwrap();
610        assert_eq!(node.to_string(), "c1 ? v1: c2 ? v2: c3 ? v3: v4");
611
612        let node = parse("c1 ? v1: (c2 ? v2: (c3 ? v3: v4))").unwrap();
613        assert_eq!(node.to_string(), "c1 ? v1: c2 ? v2: c3 ? v3: v4");
614
615        let node = parse("((c1 ? v1: c2) ? v2: c3)? v3: v4").unwrap();
616        assert_eq!(node.to_string(), "((c1 ? v1: c2) ? v2: c3) ? v3: v4");
617    }
618
619    #[test]
620    fn test_array() {
621        let node = parse("[19.0]").unwrap();
622        assert_eq!(node.to_string(), "[19]");
623
624        let node = parse("['bar', 23]").unwrap();
625        assert_eq!(node.to_string(), "[\"bar\", 23]");
626
627        let node = parse("[]").unwrap();
628        assert_eq!(node.to_string(), "[]");
629    }
630
631    #[test]
632    fn test_object() {
633        let node = parse("{a: 2, 'b': 2 + 2}").unwrap();
634        assert_eq!(node.to_string(), r#"{a: 2, "b": 2 + 2}"#);
635    }
636}