exon_common/
table_schema.rs

1// Copyright 2023 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::{Field, Fields, Schema, SchemaRef};
18
19use datafusion::error::Result;
20
21/// A builder for `TableSchema`.
22pub struct TableSchemaBuilder {
23    file_fields: Vec<Field>,
24    partition_fields: Vec<Field>,
25}
26
27impl Default for TableSchemaBuilder {
28    fn default() -> Self {
29        Self::new()
30    }
31}
32
33impl TableSchemaBuilder {
34    /// Creates a new builder for `TableSchema`.
35    pub fn new() -> Self {
36        Self {
37            file_fields: Vec::new(),
38            partition_fields: Vec::new(),
39        }
40    }
41
42    /// Create a new builder with the passed file fields.
43    pub fn new_with_field_fields(file_fields: Vec<Field>) -> Self {
44        Self {
45            file_fields,
46            partition_fields: Vec::new(),
47        }
48    }
49
50    /// Adds file fields to the `TableSchema`.
51    pub fn add_file_fields(mut self, fields: Vec<Field>) -> Self {
52        self.file_fields.extend(fields);
53        self
54    }
55
56    /// Adds partition fields to the `TableSchema`.
57    pub fn add_partition_fields(mut self, fields: Vec<Field>) -> Self {
58        self.partition_fields.extend(fields);
59        self
60    }
61
62    /// Builds the `TableSchema`, taking a `SchemaRef` as an argument, and returning a Result.
63    pub fn build(self) -> TableSchema {
64        // Combine file_fields and partition_fields as needed to create a file_projection
65        // This is a simplistic approach, you might have more complex logic to combine these fields
66        let mut fields = self.file_fields.clone();
67        fields.extend(self.partition_fields);
68
69        let schema = Schema::new(fields);
70
71        let projection: Vec<usize> = (0..self.file_fields.len()).collect();
72
73        TableSchema::new(Arc::new(schema.clone()), projection.clone())
74    }
75}
76
77#[derive(Debug, Clone)]
78/// An object that holds the schema for a table and the projection of the file schema to the table schema.
79pub struct TableSchema {
80    schema: SchemaRef,
81    file_projection: Vec<usize>,
82}
83
84impl TableSchema {
85    /// Create a new table schema
86    pub fn new(schema: SchemaRef, file_projection: Vec<usize>) -> Self {
87        Self {
88            schema,
89            file_projection,
90        }
91    }
92
93    /// Get the schema for the underlying file
94    pub fn file_schema(&self) -> Result<SchemaRef> {
95        let file_schema = &self.schema.project(&self.file_projection).map_err(|e| {
96            datafusion::error::DataFusionError::Execution(format!(
97                "Error projecting schema: {:?}",
98                e
99            ))
100        })?;
101        Ok(Arc::new(file_schema.clone()))
102    }
103
104    /// Get the fields for the table
105    pub fn fields(&self) -> Fields {
106        self.schema.fields().clone()
107    }
108
109    /// Get the schema for the table
110    pub fn table_schema(&self) -> SchemaRef {
111        self.schema.clone()
112    }
113}