orc_rust/projection.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::schema::RootDataType;
19
20// TODO: be able to nest project (project columns within struct type)
21
22/// Specifies which column indices to project from an ORC type.
23#[derive(Debug, Clone)]
24pub struct ProjectionMask {
25 /// Indices of column in ORC type, can refer to nested types
26 /// (not only root level columns)
27 indices: Option<Vec<usize>>,
28}
29
30impl ProjectionMask {
31 /// Project all columns.
32 pub fn all() -> Self {
33 Self { indices: None }
34 }
35
36 /// Project only specific columns from the root type by column index.
37 pub fn roots(root_data_type: &RootDataType, indices: impl IntoIterator<Item = usize>) -> Self {
38 // TODO: return error if column index not found?
39 let input_indices = indices.into_iter().collect::<Vec<_>>();
40 // By default always project root
41 let mut indices = vec![0];
42 root_data_type
43 .children()
44 .iter()
45 .filter(|col| input_indices.contains(&col.data_type().column_index()))
46 .for_each(|col| indices.extend(col.data_type().all_indices()));
47 Self {
48 indices: Some(indices),
49 }
50 }
51
52 /// Project only specific columns from the root type by column name.
53 pub fn named_roots<T>(root_data_type: &RootDataType, names: &[T]) -> Self
54 where
55 T: AsRef<str>,
56 {
57 // TODO: return error if column name not found?
58 // By default always project root
59 let mut indices = vec![0];
60 let names = names.iter().map(AsRef::as_ref).collect::<Vec<_>>();
61 root_data_type
62 .children()
63 .iter()
64 .filter(|col| names.contains(&col.name()))
65 .for_each(|col| indices.extend(col.data_type().all_indices()));
66 Self {
67 indices: Some(indices),
68 }
69 }
70
71 /// Check if ORC column should is projected or not, by index.
72 pub fn is_index_projected(&self, index: usize) -> bool {
73 match &self.indices {
74 Some(indices) => indices.contains(&index),
75 None => true,
76 }
77 }
78}