#![cfg_attr(all(doc, NIGHTLY_CHANNEL), feature(doc_auto_cfg))]
#![warn(
unreachable_pub,
trivial_numeric_casts,
unused_extern_crates,
rust_2018_idioms,
rust_2021_compatibility
)]
use std::any::Any;
use std::fs::DirEntry;
use std::sync::Arc;
use std::time::SystemTime;
use std::{cmp::Ordering, ops::Range};
use bytes::Bytes;
use url::Url;
use self::schema::{DataType, SchemaRef};
pub mod actions;
pub mod engine_data;
pub mod error;
pub mod expressions;
pub mod scan;
pub mod schema;
pub mod snapshot;
pub mod table;
pub mod table_changes;
pub mod table_features;
pub mod table_properties;
pub mod transaction;
pub(crate) mod predicates;
pub(crate) mod utils;
#[cfg(feature = "developer-visibility")]
pub mod path;
#[cfg(not(feature = "developer-visibility"))]
pub(crate) mod path;
#[cfg(feature = "developer-visibility")]
pub mod log_segment;
#[cfg(not(feature = "developer-visibility"))]
pub(crate) mod log_segment;
pub use delta_kernel_derive;
pub use engine_data::{EngineData, RowVisitor};
pub use error::{DeltaResult, Error};
pub use expressions::{Expression, ExpressionRef};
pub use table::Table;
#[cfg(any(
feature = "default-engine",
feature = "sync-engine",
feature = "arrow-conversion"
))]
pub mod engine;
pub type Version = u64;
pub type FileSlice = (Url, Option<Range<usize>>);
pub type FileDataReadResult = (FileMeta, Box<dyn EngineData>);
pub type FileDataReadResultIterator =
Box<dyn Iterator<Item = DeltaResult<Box<dyn EngineData>>> + Send>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileMeta {
pub location: Url,
pub last_modified: i64,
pub size: usize,
}
impl Ord for FileMeta {
fn cmp(&self, other: &Self) -> Ordering {
self.location.cmp(&other.location)
}
}
impl PartialOrd for FileMeta {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl TryFrom<DirEntry> for FileMeta {
type Error = Error;
fn try_from(ent: DirEntry) -> DeltaResult<FileMeta> {
let metadata = ent.metadata()?;
let last_modified = metadata
.modified()?
.duration_since(SystemTime::UNIX_EPOCH)
.map_err(|_| Error::generic("Failed to convert file timestamp to milliseconds"))?;
let location = Url::from_file_path(ent.path())
.map_err(|_| Error::generic(format!("Invalid path: {:?}", ent.path())))?;
let last_modified = last_modified.as_millis().try_into().map_err(|_| {
Error::generic(format!(
"Failed to convert file modification time {:?} into i64",
last_modified.as_millis()
))
})?;
Ok(FileMeta {
location,
last_modified,
size: metadata.len() as usize,
})
}
}
impl FileMeta {
pub fn new(location: Url, last_modified: i64, size: usize) -> Self {
Self {
location,
last_modified,
size,
}
}
}
pub trait AsAny: Any + Send + Sync {
fn any_ref(&self) -> &(dyn Any + Send + Sync);
fn as_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
fn into_any(self: Box<Self>) -> Box<dyn Any + Send + Sync>;
fn type_name(&self) -> &'static str;
}
impl<T: Any + Send + Sync> AsAny for T {
fn any_ref(&self) -> &(dyn Any + Send + Sync) {
self
}
fn as_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
self
}
fn into_any(self: Box<Self>) -> Box<dyn Any + Send + Sync> {
self
}
fn type_name(&self) -> &'static str {
std::any::type_name::<Self>()
}
}
pub trait ExpressionEvaluator: AsAny {
fn evaluate(&self, batch: &dyn EngineData) -> DeltaResult<Box<dyn EngineData>>;
}
pub trait ExpressionHandler: AsAny {
fn get_evaluator(
&self,
schema: SchemaRef,
expression: Expression,
output_type: DataType,
) -> Arc<dyn ExpressionEvaluator>;
}
pub trait FileSystemClient: AsAny {
fn list_from(&self, path: &Url)
-> DeltaResult<Box<dyn Iterator<Item = DeltaResult<FileMeta>>>>;
fn read_files(
&self,
files: Vec<FileSlice>,
) -> DeltaResult<Box<dyn Iterator<Item = DeltaResult<Bytes>>>>;
}
pub trait JsonHandler: AsAny {
fn parse_json(
&self,
json_strings: Box<dyn EngineData>,
output_schema: SchemaRef,
) -> DeltaResult<Box<dyn EngineData>>;
fn read_json_files(
&self,
files: &[FileMeta],
physical_schema: SchemaRef,
predicate: Option<ExpressionRef>,
) -> DeltaResult<FileDataReadResultIterator>;
fn write_json_file(
&self,
path: &Url,
data: Box<dyn Iterator<Item = DeltaResult<Box<dyn EngineData>>> + Send + '_>,
overwrite: bool,
) -> DeltaResult<()>;
}
pub trait ParquetHandler: AsAny {
fn read_parquet_files(
&self,
files: &[FileMeta],
physical_schema: SchemaRef,
predicate: Option<ExpressionRef>,
) -> DeltaResult<FileDataReadResultIterator>;
}
pub trait Engine: AsAny {
fn get_expression_handler(&self) -> Arc<dyn ExpressionHandler>;
fn get_file_system_client(&self) -> Arc<dyn FileSystemClient>;
fn get_json_handler(&self) -> Arc<dyn JsonHandler>;
fn get_parquet_handler(&self) -> Arc<dyn ParquetHandler>;
}