use arrow::datatypes::DataType as ArrowDataType;
use deltalake::kernel::DataType as DeltaDataType;
use evolution_builder::builder::ColumnBuilderRef;
use evolution_builder::datatype::{
BooleanColumnBuilder, Float16ColumnBuilder, Float32ColumnBuilder, Float64ColumnBuilder,
Int16ColumnBuilder, Int32ColumnBuilder, Int64ColumnBuilder, Utf8ColumnBuilder,
};
use evolution_common::datatype::DataType;
use evolution_parser::datatype::{BooleanParser, FloatParser, IntParser, Utf8Parser};
use log::warn;
use padder::{Alignment, Symbol};
use serde::{Deserialize, Serialize};
pub trait Column {}
pub type ColumnRef = Box<dyn Column>;
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
pub struct FixedColumn {
name: String,
offset: usize,
length: usize,
dtype: DataType,
#[serde(default)]
alignment: Alignment,
#[serde(default)]
pad_symbol: Symbol,
is_nullable: bool,
}
impl FixedColumn {
pub fn new(
name: String,
offset: usize,
length: usize,
dtype: DataType,
alignment: Alignment,
pad_symbol: Symbol,
is_nullable: bool,
) -> Self {
Self {
name,
offset,
length,
dtype,
alignment,
pad_symbol,
is_nullable,
}
}
pub fn name(&self) -> &String {
&self.name
}
pub fn offset(&self) -> usize {
self.offset
}
pub fn length(&self) -> usize {
self.length
}
pub fn dtype(&self) -> DataType {
self.dtype
}
pub fn alignment(&self) -> Alignment {
self.alignment
}
pub fn pad_symbol(&self) -> Symbol {
self.pad_symbol
}
pub fn is_nullable(&self) -> bool {
self.is_nullable
}
pub fn as_arrow_dtype(&self) -> ArrowDataType {
match self.dtype {
DataType::Boolean => ArrowDataType::Boolean,
DataType::Float16 => ArrowDataType::Float16,
DataType::Float32 => ArrowDataType::Float32,
DataType::Float64 => ArrowDataType::Float64,
DataType::Int16 => ArrowDataType::Int16,
DataType::Int32 => ArrowDataType::Int32,
DataType::Int64 => ArrowDataType::Int64,
DataType::Utf8 => ArrowDataType::Utf8,
DataType::LargeUtf8 => ArrowDataType::LargeUtf8,
}
}
pub fn as_delta_dtype(&self) -> DeltaDataType {
match self.dtype {
DataType::Boolean => DeltaDataType::BOOLEAN,
DataType::Float16 => {
warn!("Casting Float16 to Float32 for deltalake compatibility.");
DeltaDataType::FLOAT
}
DataType::Float32 => DeltaDataType::FLOAT,
DataType::Float64 => DeltaDataType::DOUBLE,
DataType::Int16 => DeltaDataType::SHORT,
DataType::Int32 => DeltaDataType::INTEGER,
DataType::Int64 => DeltaDataType::LONG,
DataType::Utf8 => DeltaDataType::STRING,
DataType::LargeUtf8 => DeltaDataType::STRING,
}
}
pub fn as_column_builder(&self) -> ColumnBuilderRef {
match self.dtype {
DataType::Boolean => Box::new(BooleanColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
BooleanParser::new(self.alignment, self.pad_symbol),
)),
DataType::Float16 => Box::new(Float16ColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
FloatParser::new(self.alignment, self.pad_symbol),
)),
DataType::Float32 => Box::new(Float32ColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
FloatParser::new(self.alignment, self.pad_symbol),
)),
DataType::Float64 => Box::new(Float64ColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
FloatParser::new(self.alignment, self.pad_symbol),
)),
DataType::Int16 => Box::new(Int16ColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
IntParser::new(),
)),
DataType::Int32 => Box::new(Int32ColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
IntParser::new(),
)),
DataType::Int64 => Box::new(Int64ColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
IntParser::new(),
)),
DataType::Utf8 => Box::new(Utf8ColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
Utf8Parser::new(self.alignment, self.pad_symbol),
)),
DataType::LargeUtf8 => Box::new(Utf8ColumnBuilder::new(
self.name.clone(),
self.length,
self.is_nullable,
Utf8Parser::new(self.alignment, self.pad_symbol),
)),
}
}
}
impl Column for FixedColumn {}
#[cfg(test)]
mod tests_column {
use super::*;
use std::fs;
use std::path::PathBuf;
#[test]
fn test_deserialize_column_from_file() {
let mut path: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("res/test_valid_column.json");
let a: FixedColumn = serde_json::from_slice(&fs::read(path).unwrap()).unwrap();
let b: FixedColumn = FixedColumn::new(
String::from("NotCoolColumn"),
0 as usize,
2 as usize,
DataType::Float16,
Alignment::Center,
Symbol::Asterisk,
true,
);
assert_ne!(a.name(), b.name());
assert_ne!(a.offset(), b.offset());
assert_ne!(a.length(), b.length());
assert_ne!(a.dtype(), b.dtype());
assert_eq!(a.alignment(), b.alignment());
assert_ne!(a.pad_symbol(), b.pad_symbol());
assert_ne!(a.is_nullable(), b.is_nullable());
assert_ne!(a.as_arrow_dtype(), b.as_arrow_dtype());
assert_eq!(a.as_delta_dtype(), b.as_delta_dtype());
}
#[test]
#[should_panic]
fn test_deserialize_invalid_column_from_file() {
let mut path: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("res/test_invalid_column.json");
let _: FixedColumn = serde_json::from_slice(&fs::read(path).unwrap()).unwrap();
}
}