vegafusion_core/expression/
column_usage.rs

1use crate::task_graph::graph::ScopedVariable;
2use crate::task_graph::scope::TaskScope;
3use std::collections::{HashMap, HashSet};
4
5pub type VlSelectionFields = HashMap<ScopedVariable, ColumnUsage>;
6
7/// Enum storing info on which dataset columns are used in a given context.
8/// Due to the dynamic nature of Vega specifications, it's not always possible to statically
9/// determine which columns from a dataset will be used at runtime. In this case the
10/// ColumnUsage::Unknown variant is used.  In the context of projection pushdown,
11/// the ColumnUsage::Unknown variant indicates that all of original dataset columns must be
12/// maintained
13#[derive(Clone, Debug, PartialEq, Eq)]
14pub enum ColumnUsage {
15    Unknown,
16    Known(HashSet<String>),
17}
18
19impl ColumnUsage {
20    pub fn empty() -> ColumnUsage {
21        ColumnUsage::Known(Default::default())
22    }
23
24    pub fn with_column(&self, column: &str) -> ColumnUsage {
25        self.union(&ColumnUsage::from(vec![column].as_slice()))
26    }
27
28    /// Take the union of two ColumnUsage instances. If both are ColumnUsage::Known, then take
29    /// the union of their known columns. If either is ColumnUsage::Unknown, then the union is
30    /// also Unknown.
31    pub fn union(&self, other: &ColumnUsage) -> ColumnUsage {
32        match (self, other) {
33            (ColumnUsage::Known(self_cols), ColumnUsage::Known(other_cols)) => {
34                // If both column usages are known, we can union the known columns
35                let new_cols: HashSet<_> = self_cols.union(other_cols).cloned().collect();
36                ColumnUsage::Known(new_cols)
37            }
38            _ => {
39                // If either is Unknown, then the union is unknown
40                ColumnUsage::Unknown
41            }
42        }
43    }
44
45    pub fn difference(&self, other: &ColumnUsage) -> ColumnUsage {
46        match (self, other) {
47            (ColumnUsage::Known(self_cols), ColumnUsage::Known(other_cols)) => {
48                // If both column usages are known, we can take the set difference the known columns
49                let new_cols: HashSet<_> = self_cols.difference(other_cols).cloned().collect();
50                ColumnUsage::Known(new_cols)
51            }
52            _ => {
53                // If either is Unknown, then the difference is unknown
54                ColumnUsage::Unknown
55            }
56        }
57    }
58}
59
60impl From<&str> for ColumnUsage {
61    fn from(column: &str) -> Self {
62        let columns: HashSet<_> = vec![column.to_string()].into_iter().collect();
63        Self::Known(columns)
64    }
65}
66
67impl From<&[&str]> for ColumnUsage {
68    fn from(columns: &[&str]) -> Self {
69        let columns: HashSet<_> = columns.iter().map(|s| s.to_string()).collect();
70        Self::Known(columns)
71    }
72}
73
74impl From<&[String]> for ColumnUsage {
75    fn from(columns: &[String]) -> Self {
76        let columns: HashSet<_> = columns.iter().cloned().collect();
77        Self::Known(columns)
78    }
79}
80
81/// Struct that tracks the usage of all columns across a collection of datasets
82#[derive(Clone, Debug, PartialEq, Eq)]
83pub struct DatasetsColumnUsage {
84    pub usages: HashMap<ScopedVariable, ColumnUsage>,
85    pub aliases: HashMap<ScopedVariable, ScopedVariable>,
86}
87
88impl DatasetsColumnUsage {
89    pub fn empty() -> Self {
90        Self {
91            usages: Default::default(),
92            aliases: Default::default(),
93        }
94    }
95
96    pub fn with_column_usage(&self, datum_var: &ScopedVariable, usage: ColumnUsage) -> Self {
97        let other_column_usage = Self {
98            usages: vec![(datum_var.clone(), usage)].into_iter().collect(),
99            aliases: Default::default(),
100        };
101        self.union(&other_column_usage)
102    }
103
104    pub fn with_unknown_usage(&self, datum_var: &ScopedVariable) -> Self {
105        self.with_column_usage(datum_var, ColumnUsage::Unknown)
106    }
107
108    pub fn without_column_usage(&self, datum_var: &ScopedVariable, usage: &ColumnUsage) -> Self {
109        let mut new_usages = self.usages.clone();
110        if let Some(current_usage) = new_usages.get(datum_var) {
111            let new_usage = current_usage.difference(usage);
112            new_usages.insert(datum_var.clone(), new_usage);
113        }
114        Self {
115            usages: new_usages,
116            aliases: self.aliases.clone(),
117        }
118    }
119
120    pub fn with_alias(&self, from: ScopedVariable, to: ScopedVariable) -> Self {
121        let mut aliases = self.aliases.clone();
122        aliases.insert(from, to);
123        Self {
124            usages: self.usages.clone(),
125            aliases,
126        }
127    }
128
129    fn apply_aliases(&self, aliases: &HashMap<ScopedVariable, ScopedVariable>) -> Self {
130        let mut new_usages = self.usages.clone();
131        for var in new_usages.keys().cloned().collect::<Vec<_>>() {
132            if let Some(target) = aliases.get(&var) {
133                let aliased = new_usages.remove(&var).unwrap();
134                new_usages.insert(target.clone(), aliased);
135            }
136        }
137
138        // Union aliases
139        let mut new_aliases = self.aliases.clone();
140        for (key, val) in aliases {
141            new_aliases.insert(key.clone(), val.clone());
142        }
143        Self {
144            usages: new_usages,
145            aliases: new_aliases,
146        }
147    }
148
149    /// Take the union of two DatasetColumnUsage instances.
150    pub fn union(&self, other: &DatasetsColumnUsage) -> DatasetsColumnUsage {
151        // Union aliases
152        let mut aliases = self.aliases.clone();
153        for (key, val) in &other.aliases {
154            aliases.insert(key.clone(), val.clone());
155        }
156
157        // Apply aliases
158        let unaliased_self_usages = self.apply_aliases(&aliases).usages;
159        let unaliased_other_usages = other.apply_aliases(&aliases).usages;
160
161        let self_vars: HashSet<_> = unaliased_self_usages.keys().cloned().collect();
162        let other_vars: HashSet<_> = unaliased_other_usages.keys().cloned().collect();
163        let union_vars: HashSet<_> = self_vars.union(&other_vars).cloned().collect();
164
165        let mut usages: HashMap<ScopedVariable, ColumnUsage> = HashMap::new();
166        for var in union_vars {
167            let self_usage = unaliased_self_usages
168                .get(&var)
169                .cloned()
170                .unwrap_or_else(ColumnUsage::empty);
171            let other_usage = unaliased_other_usages
172                .get(&var)
173                .cloned()
174                .unwrap_or_else(ColumnUsage::empty);
175            let combined_usage = self_usage.union(&other_usage);
176            usages.insert(var, combined_usage);
177        }
178
179        Self { usages, aliases }
180    }
181}
182
183pub trait GetDatasetsColumnUsage {
184    fn datasets_column_usage(
185        &self,
186        datum_var: &Option<ScopedVariable>,
187        usage_scope: &[u32],
188        task_scope: &TaskScope,
189        vl_selection_fields: &VlSelectionFields,
190    ) -> DatasetsColumnUsage;
191}
192
193#[cfg(test)]
194mod tests {
195    use crate::expression::column_usage::ColumnUsage;
196
197    #[test]
198    fn test_with_column() {
199        let left = ColumnUsage::from(vec!["one", "two"].as_slice());
200        let result = left.with_column("three").with_column("four");
201        let expected = ColumnUsage::from(vec!["one", "two", "three", "four"].as_slice());
202        assert_eq!(result, expected)
203    }
204
205    #[test]
206    fn test_union_known_known() {
207        let left = ColumnUsage::from(vec!["one", "two"].as_slice());
208        let right = ColumnUsage::from(vec!["two", "three", "four"].as_slice());
209        let union = left.union(&right);
210        let expected = ColumnUsage::from(vec!["one", "two", "three", "four"].as_slice());
211        assert_eq!(union, expected)
212    }
213
214    #[test]
215    fn test_union_known_unknown() {
216        let left = ColumnUsage::from(vec!["one", "two"].as_slice());
217        let union = left.union(&ColumnUsage::Unknown);
218        assert_eq!(union, ColumnUsage::Unknown)
219    }
220
221    #[test]
222    fn test_union_unknown_known() {
223        let right = ColumnUsage::from(vec!["two", "three", "four"].as_slice());
224        let union = ColumnUsage::Unknown.union(&right);
225        assert_eq!(union, ColumnUsage::Unknown)
226    }
227
228    #[test]
229    fn test_union_unknown_unknown() {
230        let union = ColumnUsage::Unknown.union(&ColumnUsage::Unknown);
231        assert_eq!(union, ColumnUsage::Unknown)
232    }
233
234    #[test]
235    fn test_difference_known_known() {
236        let left = ColumnUsage::from(vec!["one", "two", "three"].as_slice());
237        let right = ColumnUsage::from(vec!["three", "four"].as_slice());
238        let union = left.difference(&right);
239        let expected = ColumnUsage::from(vec!["one", "two"].as_slice());
240        assert_eq!(union, expected)
241    }
242
243    #[test]
244    fn test_difference_known_unknown() {
245        let left = ColumnUsage::from(vec!["one", "two"].as_slice());
246        let union = left.difference(&ColumnUsage::Unknown);
247        assert_eq!(union, ColumnUsage::Unknown)
248    }
249
250    #[test]
251    fn test_difference_unknown_known() {
252        let right = ColumnUsage::from(vec!["two", "three", "four"].as_slice());
253        let union = ColumnUsage::Unknown.difference(&right);
254        assert_eq!(union, ColumnUsage::Unknown)
255    }
256
257    #[test]
258    fn test_difference_unknown_unknown() {
259        let union = ColumnUsage::Unknown.difference(&ColumnUsage::Unknown);
260        assert_eq!(union, ColumnUsage::Unknown)
261    }
262}