use std::sync::Arc;
use arrow::datatypes::{Field, Fields, Schema, SchemaRef};
use datafusion::error::Result;
pub struct TableSchemaBuilder {
file_fields: Vec<Field>,
partition_fields: Vec<Field>,
}
impl Default for TableSchemaBuilder {
fn default() -> Self {
Self::new()
}
}
impl TableSchemaBuilder {
pub fn new() -> Self {
Self {
file_fields: Vec::new(),
partition_fields: Vec::new(),
}
}
pub fn new_with_field_fields(file_fields: Vec<Field>) -> Self {
Self {
file_fields,
partition_fields: Vec::new(),
}
}
pub fn add_file_fields(mut self, fields: Vec<Field>) -> Self {
self.file_fields.extend(fields);
self
}
pub fn add_partition_fields(mut self, fields: Vec<Field>) -> Self {
self.partition_fields.extend(fields);
self
}
pub fn build(self) -> TableSchema {
let mut fields = self.file_fields.clone();
fields.extend(self.partition_fields);
let schema = Schema::new(fields);
let projection: Vec<usize> = (0..self.file_fields.len()).collect();
TableSchema::new(Arc::new(schema.clone()), projection.clone())
}
}
#[derive(Debug, Clone)]
pub struct TableSchema {
schema: SchemaRef,
file_projection: Vec<usize>,
}
impl TableSchema {
pub fn new(schema: SchemaRef, file_projection: Vec<usize>) -> Self {
Self {
schema,
file_projection,
}
}
pub fn file_schema(&self) -> Result<SchemaRef> {
let file_schema = &self.schema.project(&self.file_projection).map_err(|e| {
datafusion::error::DataFusionError::Execution(format!(
"Error projecting schema: {:?}",
e
))
})?;
Ok(Arc::new(file_schema.clone()))
}
pub fn fields(&self) -> Fields {
self.schema.fields().clone()
}
pub fn table_schema(&self) -> SchemaRef {
self.schema.clone()
}
}