vegafusion_core/expression/
visitors.rs

1use crate::proto::gen::expression::property::Key;
2use crate::proto::gen::expression::{
3    ArrayExpression, BinaryExpression, CallExpression, ConditionalExpression, Expression,
4    Identifier, Literal, LogicalExpression, MemberExpression, ObjectExpression, UnaryExpression,
5};
6
7use crate::expression::column_usage::{ColumnUsage, DatasetsColumnUsage, VlSelectionFields};
8use crate::expression::supported::{
9    ALL_DATA_FNS, ALL_EXPRESSION_CONSTANTS, ALL_SCALE_FNS, IMPLICIT_VARS, SUPPORTED_DATA_FNS,
10    SUPPORTED_EXPRESSION_FNS, SUPPORTED_SCALE_FNS,
11};
12use crate::proto::gen::expression::expression::Expr;
13use crate::proto::gen::expression::literal::Value;
14use crate::proto::gen::tasks::Variable;
15use crate::task_graph::graph::ScopedVariable;
16use crate::task_graph::scope::TaskScope;
17use crate::task_graph::task::InputVariable;
18use std::collections::HashSet;
19
20pub trait ExpressionVisitor {
21    fn visit_expression(&mut self, _expression: &Expression) {}
22    fn visit_identifier(&mut self, _node: &Identifier) {}
23    fn visit_called_identifier(&mut self, _node: &Identifier, _args: &[Expression]) {}
24    fn visit_literal(&mut self, _node: &Literal) {}
25    fn visit_binary(&mut self, _node: &BinaryExpression) {}
26    fn visit_logical(&mut self, _node: &LogicalExpression) {}
27    fn visit_unary(&mut self, _node: &UnaryExpression) {}
28    fn visit_conditional(&mut self, _node: &ConditionalExpression) {}
29    fn visit_member(&mut self, _node: &MemberExpression) {}
30    fn visit_call(&mut self, _node: &CallExpression) {}
31    fn visit_array(&mut self, _node: &ArrayExpression) {}
32    fn visit_object(&mut self, _node: &ObjectExpression) {}
33    fn visit_object_key(&mut self, _node: &Key) {}
34    fn visit_static_member_identifier(&mut self, _node: &Identifier) {}
35}
36
37pub trait MutExpressionVisitor {
38    fn visit_expression(&mut self, _expression: &mut Expression) {}
39    fn visit_identifier(&mut self, _node: &mut Identifier) {}
40    fn visit_called_identifier(&mut self, _node: &mut Identifier, _args: &mut [Expression]) {}
41    fn visit_literal(&mut self, _node: &mut Literal) {}
42    fn visit_binary(&mut self, _node: &mut BinaryExpression) {}
43    fn visit_logical(&mut self, _node: &mut LogicalExpression) {}
44    fn visit_unary(&mut self, _node: &mut UnaryExpression) {}
45    fn visit_conditional(&mut self, _node: &mut ConditionalExpression) {}
46    fn visit_member(&mut self, _node: &mut MemberExpression) {}
47    fn visit_call(&mut self, _node: &mut CallExpression) {}
48    fn visit_array(&mut self, _node: &mut ArrayExpression) {}
49    fn visit_object(&mut self, _node: &mut ObjectExpression) {}
50    fn visit_object_key(&mut self, _node: &mut Key) {}
51    fn visit_static_member_identifier(&mut self, _node: &mut Identifier) {}
52}
53
54/// Visitor to set all spans in the expression tree to None
55#[derive(Clone, Default)]
56pub struct ClearSpansVisitor {}
57impl ClearSpansVisitor {
58    pub fn new() -> Self {
59        Self {}
60    }
61}
62
63impl MutExpressionVisitor for ClearSpansVisitor {
64    fn visit_expression(&mut self, expression: &mut Expression) {
65        expression.span.take();
66    }
67    fn visit_member(&mut self, node: &mut MemberExpression) {
68        node.property.as_mut().unwrap().span.take();
69    }
70}
71
72/// Visitor to collect all unbound input variables in the expression
73#[derive(Clone, Default)]
74pub struct GetInputVariablesVisitor {
75    pub input_variables: HashSet<InputVariable>,
76    pub expression_fns: HashSet<String>,
77    pub data_fns: HashSet<String>,
78    pub scale_fns: HashSet<String>,
79}
80
81impl GetInputVariablesVisitor {
82    pub fn new() -> Self {
83        Self {
84            input_variables: Default::default(),
85            expression_fns: Default::default(),
86            data_fns: Default::default(),
87            scale_fns: Default::default(),
88        }
89    }
90}
91
92impl ExpressionVisitor for GetInputVariablesVisitor {
93    fn visit_identifier(&mut self, node: &Identifier) {
94        // implicit vars like datum and event do not count as a variables
95        if !IMPLICIT_VARS.contains(node.name.as_str())
96            && !ALL_EXPRESSION_CONSTANTS.contains(node.name.as_str())
97        {
98            self.input_variables.insert(InputVariable {
99                var: Variable::new_signal(&node.name),
100                propagate: true,
101            });
102        }
103    }
104
105    /// Collect data and scale identifiers. These show up as a literal string as the first
106    /// argument to a Data or Scale callable.
107    fn visit_called_identifier(&mut self, node: &Identifier, args: &[Expression]) {
108        if let Some(arg0) = args.first() {
109            if let Ok(arg0) = arg0.as_literal() {
110                if let Value::String(arg0) = arg0.value() {
111                    // Check data callable
112                    if ALL_DATA_FNS.contains(node.name.as_str()) {
113                        // Propagate on changes to data unless this is a modify function
114                        let propagate = node.name != "modify";
115                        self.input_variables.insert(InputVariable {
116                            var: Variable::new_data(arg0),
117                            propagate,
118                        });
119                    }
120
121                    // Check scale callable
122                    if ALL_SCALE_FNS.contains(node.name.as_str()) {
123                        self.input_variables.insert(InputVariable {
124                            var: Variable::new_scale(arg0),
125                            propagate: true,
126                        });
127                    }
128                }
129            }
130        }
131
132        // Record function type
133        if ALL_DATA_FNS.contains(node.name.as_str()) {
134            self.data_fns.insert(node.name.clone());
135        } else if ALL_SCALE_FNS.contains(node.name.as_str()) {
136            self.scale_fns.insert(node.name.clone());
137        } else {
138            self.expression_fns.insert(node.name.clone());
139        }
140    }
141}
142
143/// Visitor to collect all output variables in the expression
144#[derive(Clone, Default)]
145pub struct UpdateVariablesExprVisitor {
146    pub update_variables: HashSet<Variable>,
147}
148
149impl UpdateVariablesExprVisitor {
150    pub fn new() -> Self {
151        Self {
152            update_variables: Default::default(),
153        }
154    }
155}
156
157impl ExpressionVisitor for UpdateVariablesExprVisitor {
158    fn visit_called_identifier(&mut self, node: &Identifier, args: &[Expression]) {
159        if node.name == "modify" {
160            if let Some(arg0) = args.first() {
161                if let Ok(arg0) = arg0.as_literal() {
162                    if let Value::String(arg0) = arg0.value() {
163                        // First arg is a string, which holds the name of the output dataset
164                        self.update_variables.insert(Variable::new_data(arg0));
165                    }
166                }
167            }
168        }
169    }
170}
171
172/// Visitor to check whether an expression is supported by the VegaFusion Runtime
173#[derive(Clone, Default)]
174pub struct CheckSupportedExprVisitor {
175    pub supported: bool,
176}
177
178impl CheckSupportedExprVisitor {
179    pub fn new() -> Self {
180        Self { supported: true }
181    }
182}
183
184impl ExpressionVisitor for CheckSupportedExprVisitor {
185    fn visit_called_identifier(&mut self, node: &Identifier, args: &[Expression]) {
186        // Check for unsupported functions
187        if ALL_DATA_FNS.contains(node.name.as_str()) {
188            if !SUPPORTED_DATA_FNS.contains(node.name.as_str()) {
189                self.supported = false;
190            }
191            if node.name == "vlSelectionResolve" && args.len() > 2 {
192                // The third (multi) and forth (vl5) arguments are not supported
193                self.supported = false;
194            }
195        } else if ALL_SCALE_FNS.contains(node.name.as_str()) {
196            if !SUPPORTED_SCALE_FNS.contains(node.name.as_str()) {
197                self.supported = false;
198            }
199        } else if !SUPPORTED_EXPRESSION_FNS.contains(node.name.as_str()) {
200            self.supported = false;
201        } else if node.name == "indexof" {
202            // We only support the array variant of indexof (not the string variant)
203            if !(args.len() == 2 && matches!(args[0].expr, Some(Expr::Array(_)))) {
204                self.supported = false;
205            }
206        } else if node.name == "format" {
207            // We only support format with an empty string as second argument
208            if args.len() != 2 {
209                self.supported = false;
210            } else if let Some(Expr::Literal(Literal {
211                value: Some(Value::String(v)),
212                ..
213            })) = &args[1].expr
214            {
215                if !v.is_empty() {
216                    self.supported = false;
217                }
218            } else {
219                self.supported = false;
220            }
221        }
222    }
223
224    fn visit_member(&mut self, node: &MemberExpression) {
225        // Check for unsupported use of member property.
226        // Property cannot use implicit datum variable
227        if node.computed {
228            let property = node.property.as_ref().unwrap();
229            if property.implicit_vars().contains(&"datum".to_string()) {
230                // e.g. ([0, 1])[datum.foo]
231                self.supported = false;
232            }
233        }
234
235        if let Some(object) = &node.object {
236            if object.implicit_vars().contains(&"datum".to_string()) {
237                let object_expr = object.expr.as_ref().unwrap();
238                let property = node.property.as_ref().unwrap();
239                let property_expr = property.expr.as_ref().unwrap();
240
241                // Object of member may only contain datum if it is the literal datum identifier.
242                // datum["foo"] is ok, (datum["foo"])["bar"] is not
243                let is_datum_literal = object_expr
244                    == &Expr::Identifier(Identifier {
245                        name: "datum".to_string(),
246                    });
247
248                // ... unless the property is a number. datum["foo"][0] is ok
249                let is_number_index = matches!(
250                    property_expr,
251                    Expr::Literal(Literal {
252                        value: Some(Value::Number(_)),
253                        ..
254                    })
255                );
256
257                if !(is_datum_literal || is_number_index) {
258                    self.supported = false;
259                }
260            }
261        }
262    }
263}
264
265/// Visitor to collect all implicit variables used in an expression
266#[derive(Clone, Default)]
267pub struct ImplicitVariablesExprVisitor {
268    pub implicit_vars: HashSet<String>,
269}
270
271impl ImplicitVariablesExprVisitor {
272    pub fn new() -> Self {
273        Self {
274            implicit_vars: Default::default(),
275        }
276    }
277}
278
279impl ExpressionVisitor for ImplicitVariablesExprVisitor {
280    fn visit_identifier(&mut self, node: &Identifier) {
281        // implicit vars like datum and event do not count as a variables
282        if IMPLICIT_VARS.contains(node.name.as_str()) {
283            self.implicit_vars.insert(node.name.clone());
284        }
285    }
286}
287
288/// Visitor to collect the columns
289#[derive(Clone)]
290pub struct DatasetsColumnUsageVisitor<'a> {
291    pub vl_selection_fields: &'a VlSelectionFields,
292    pub datum_var: &'a Option<ScopedVariable>,
293    pub usage_scope: &'a [u32],
294    pub task_scope: &'a TaskScope,
295    pub dataset_column_usage: DatasetsColumnUsage,
296}
297
298impl<'a> DatasetsColumnUsageVisitor<'a> {
299    pub fn new(
300        datum_var: &'a Option<ScopedVariable>,
301        usage_scope: &'a [u32],
302        task_scope: &'a TaskScope,
303        vl_selection_fields: &'a VlSelectionFields,
304    ) -> Self {
305        Self {
306            vl_selection_fields,
307            datum_var,
308            usage_scope,
309            task_scope,
310            dataset_column_usage: DatasetsColumnUsage::empty(),
311        }
312    }
313}
314
315impl ExpressionVisitor for DatasetsColumnUsageVisitor<'_> {
316    fn visit_member(&mut self, node: &MemberExpression) {
317        if let (Some(datum_var), Some(object), Some(property)) =
318            (&self.datum_var, &node.object, &node.property)
319        {
320            if let (Some(Expr::Identifier(object_id)), Some(property_expr)) =
321                (&object.expr, &property.expr)
322            {
323                if object_id.name == "datum" {
324                    // This expression is a member expression on the datum free variable
325                    if node.computed {
326                        match property_expr {
327                            Expr::Literal(Literal {
328                                value: Some(Value::String(name)),
329                                ..
330                            }) => {
331                                // Found `datum['col_name']` usage
332                                self.dataset_column_usage = self
333                                    .dataset_column_usage
334                                    .with_column_usage(datum_var, ColumnUsage::from(name.as_str()));
335                            }
336                            _ => {
337                                // Unknown usage (e.g. `datum['col_' + 'name']`)
338                                self.dataset_column_usage =
339                                    self.dataset_column_usage.with_unknown_usage(datum_var);
340                            }
341                        }
342                    } else {
343                        match property_expr {
344                            Expr::Identifier(id) => {
345                                // Found `datum.col_name` usage
346                                self.dataset_column_usage =
347                                    self.dataset_column_usage.with_column_usage(
348                                        datum_var,
349                                        ColumnUsage::from(id.name.as_str()),
350                                    );
351                            }
352                            _ => {
353                                // Unknown datum usage
354                                self.dataset_column_usage =
355                                    self.dataset_column_usage.with_unknown_usage(datum_var);
356                            }
357                        }
358                    }
359                }
360            }
361        }
362    }
363
364    fn visit_call(&mut self, node: &CallExpression) {
365        // Handle data functions
366        if ALL_DATA_FNS.contains(node.callee.as_str()) {
367            // First argument should be a string
368            if let Some(Expression {
369                expr:
370                    Some(Expr::Literal(Literal {
371                        value: Some(Value::String(reference_data_name)),
372                        ..
373                    })),
374                ..
375            }) = node.arguments.first()
376            {
377                // Resolve data variable
378                let reference_data_var = Variable::new_data(reference_data_name);
379                if let Ok(resolved) = self
380                    .task_scope
381                    .resolve_scope(&reference_data_var, self.usage_scope)
382                {
383                    let scoped_reference_data_var: ScopedVariable = (resolved.var, resolved.scope);
384                    // e.g. data('other_dataset')
385                    // We don't know which columns in the referenced dataset are used
386                    self.dataset_column_usage = self
387                        .dataset_column_usage
388                        .with_unknown_usage(&scoped_reference_data_var);
389
390                    // Handle vlSelectionTest, which also uses datum columns
391                    if let Some(datum_var) = self.datum_var {
392                        match node.callee.as_str() {
393                            "vlSelectionTest" => {
394                                if let Some(fields) =
395                                    self.vl_selection_fields.get(&scoped_reference_data_var)
396                                {
397                                    // Add selection fields to usage for datum
398                                    self.dataset_column_usage = self
399                                        .dataset_column_usage
400                                        .with_column_usage(datum_var, fields.clone());
401                                } else {
402                                    // Unknown fields dataset, so we don't know which datum columns
403                                    // are needed at runtime
404                                    self.dataset_column_usage =
405                                        self.dataset_column_usage.with_unknown_usage(datum_var);
406                                }
407                            }
408                            "vlSelectionIdTest" => {
409                                // Add _vgsid_ fields usage for datum
410                                self.dataset_column_usage = self
411                                    .dataset_column_usage
412                                    .with_column_usage(datum_var, ColumnUsage::from("_vgsid_"));
413                            }
414                            _ => {}
415                        }
416                    }
417                } else {
418                    // Unknown brushing dataset, so we don't know which datum columns
419                    // are needed at runtime
420                    if let Some(datum_var) = self.datum_var {
421                        self.dataset_column_usage =
422                            self.dataset_column_usage.with_unknown_usage(datum_var);
423                    }
424                }
425            }
426        } else if node.callee.as_str() == "intersect" {
427            // Look for expression like:
428            //     intersect(arg0, {markname: "view_10_marks"}, ...)
429            // In this case "view_10_marks" is the name of a mark dataset, and we don't know what columns
430            // from the mark's source dataset are used.
431            if let Some(Expression {
432                expr: Some(Expr::Object(arg1)),
433                ..
434            }) = node.arguments.get(1)
435            {
436                for prop in &arg1.properties {
437                    if let (Some(key), Some(val)) = (&prop.key, &prop.value) {
438                        let property = match key {
439                            Key::Identifier(id) => id.name.clone(),
440                            Key::Literal(Literal {
441                                value: Some(Value::String(name)),
442                                ..
443                            }) => name.clone(),
444                            _ => continue,
445                        };
446                        if property == "markname" {
447                            if let Some(Expr::Literal(Literal {
448                                value: Some(Value::String(mark_name)),
449                                ..
450                            })) = &val.expr
451                            {
452                                let mark_data_var = Variable::new_data(mark_name);
453                                if let Ok(resolved) = self
454                                    .task_scope
455                                    .resolve_scope(&mark_data_var, self.usage_scope)
456                                {
457                                    let scoped_reference_data_var: ScopedVariable =
458                                        (resolved.var, resolved.scope);
459                                    self.dataset_column_usage = self
460                                        .dataset_column_usage
461                                        .with_unknown_usage(&scoped_reference_data_var);
462                                }
463                            }
464                        }
465                    }
466                }
467            }
468        }
469    }
470}