orc_rust/
projection.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::schema::RootDataType;
19
20// TODO: be able to nest project (project columns within struct type)
21
22/// Specifies which column indices to project from an ORC type.
23#[derive(Debug, Clone)]
24pub struct ProjectionMask {
25    /// Indices of column in ORC type, can refer to nested types
26    /// (not only root level columns)
27    indices: Option<Vec<usize>>,
28}
29
30impl ProjectionMask {
31    /// Project all columns.
32    pub fn all() -> Self {
33        Self { indices: None }
34    }
35
36    /// Project only specific columns from the root type by column index.
37    pub fn roots(root_data_type: &RootDataType, indices: impl IntoIterator<Item = usize>) -> Self {
38        // TODO: return error if column index not found?
39        let input_indices = indices.into_iter().collect::<Vec<_>>();
40        // By default always project root
41        let mut indices = vec![0];
42        root_data_type
43            .children()
44            .iter()
45            .filter(|col| input_indices.contains(&col.data_type().column_index()))
46            .for_each(|col| indices.extend(col.data_type().all_indices()));
47        Self {
48            indices: Some(indices),
49        }
50    }
51
52    /// Project only specific columns from the root type by column name.
53    pub fn named_roots<T>(root_data_type: &RootDataType, names: &[T]) -> Self
54    where
55        T: AsRef<str>,
56    {
57        // TODO: return error if column name not found?
58        // By default always project root
59        let mut indices = vec![0];
60        let names = names.iter().map(AsRef::as_ref).collect::<Vec<_>>();
61        root_data_type
62            .children()
63            .iter()
64            .filter(|col| names.contains(&col.name()))
65            .for_each(|col| indices.extend(col.data_type().all_indices()));
66        Self {
67            indices: Some(indices),
68        }
69    }
70
71    /// Check if ORC column should is projected or not, by index.
72    pub fn is_index_projected(&self, index: usize) -> bool {
73        match &self.indices {
74            Some(indices) => indices.contains(&index),
75            None => true,
76        }
77    }
78}