1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
use crate::schema::RootDataType;

// TODO: be able to nest project (project columns within struct type)

/// Specifies which column indices to project from an ORC type.
#[derive(Debug, Clone)]
pub struct ProjectionMask {
    /// Indices of column in ORC type, can refer to nested types
    /// (not only root level columns)
    indices: Option<Vec<usize>>,
}

impl ProjectionMask {
    /// Project all columns.
    pub fn all() -> Self {
        Self { indices: None }
    }

    /// Project only specific columns from the root type by column index.
    pub fn roots(root_data_type: &RootDataType, indices: impl IntoIterator<Item = usize>) -> Self {
        // TODO: return error if column index not found?
        let input_indices = indices.into_iter().collect::<Vec<_>>();
        // By default always project root
        let mut indices = vec![0];
        root_data_type
            .children()
            .iter()
            .filter(|col| input_indices.contains(&col.data_type().column_index()))
            .for_each(|col| indices.extend(col.data_type().all_indices()));
        Self {
            indices: Some(indices),
        }
    }

    /// Project only specific columns from the root type by column name.
    pub fn named_roots<T>(root_data_type: &RootDataType, names: &[T]) -> Self
    where
        T: AsRef<str>,
    {
        // TODO: return error if column name not found?
        // By default always project root
        let mut indices = vec![0];
        let names = names.iter().map(AsRef::as_ref).collect::<Vec<_>>();
        root_data_type
            .children()
            .iter()
            .filter(|col| names.contains(&col.name()))
            .for_each(|col| indices.extend(col.data_type().all_indices()));
        Self {
            indices: Some(indices),
        }
    }

    /// Check if ORC column should is projected or not, by index.
    pub fn is_index_projected(&self, index: usize) -> bool {
        match &self.indices {
            Some(indices) => indices.contains(&index),
            None => true,
        }
    }
}